diff --git a/src/bin/jbc2mpl b/src/bin/jbc2mpl index 08574f1cc75e2325c1e041dbc68ddea59d9052cd..06cd0d6138d0b4ce698d49fa45d1e3fd197da9c4 100755 Binary files a/src/bin/jbc2mpl and b/src/bin/jbc2mpl differ diff --git a/src/bin/maple b/src/bin/maple index bf36b16beff6c70e3dc45a68daa15ef2586a5f7f..c2f1eb2811a3d68fe187aa0adb9e0fe34288ecc4 100755 Binary files a/src/bin/maple and b/src/bin/maple differ diff --git a/src/maple_be/BUILD.gn b/src/maple_be/BUILD.gn index eda2d6f2631ee059619174b97392616faefac894..256adece139ffc2b86bb82ab182092708365bc59 100644 --- a/src/maple_be/BUILD.gn +++ b/src/maple_be/BUILD.gn @@ -52,13 +52,18 @@ src_libcgaarch64 = [ "src/cg/aarch64/aarch64_abi.cpp", "src/cg/aarch64/mpl_atomic.cpp", "src/cg/aarch64/aarch64_cgfunc.cpp", + "src/cg/aarch64/aarch64_dependence.cpp", + "src/cg/aarch64/aarch64_ebo.cpp", "src/cg/aarch64/aarch64_emitter.cpp", + "src/cg/aarch64/aarch64_fixshortbranch.cpp", + "src/cg/aarch64/aarch64_global.cpp", "src/cg/aarch64/aarch64_proepilog.cpp", "src/cg/aarch64/aarch64_immediate.cpp", "src/cg/aarch64/aarch64_operand.cpp", "src/cg/aarch64/aarch64_color_ra.cpp", "src/cg/aarch64/aarch64_reg_alloc.cpp", "src/cg/aarch64/aarch64_cg.cpp", + "src/cg/aarch64/aarch64_ico.cpp", "src/cg/aarch64/aarch64_insn.cpp", "src/cg/aarch64/aarch64_isa.cpp", "src/cg/aarch64/aarch64_memlayout.cpp", @@ -66,26 +71,41 @@ src_libcgaarch64 = [ "src/cg/aarch64/aarch64_live.cpp", "src/cg/aarch64/aarch64_yieldpoint.cpp", "src/cg/aarch64/aarch64_offset_adjust.cpp", + "src/cg/aarch64/aarch64_optimize_common.cpp", + "src/cg/aarch64/aarch64_peep.cpp", + "src/cg/aarch64/aarch64_reaching.cpp", + "src/cg/aarch64/aarch64_schedule.cpp", + "src/cg/aarch64/aarch64_strldr.cpp", ] src_libcg = [ + "src/cg/args.cpp", + "src/cg/cfgo.cpp", "src/cg/cfi.cpp", "src/cg/cg.cpp", "src/cg/cgbb.cpp", "src/cg/cgfunc.cpp", - "src/cg/proepilog.cpp", - "src/cg/args.cpp", - "src/cg/live.cpp", "src/cg/cg_cfg.cpp", - "src/cg/eh_func.cpp", - "src/cg/emit.cpp", "src/cg/cg_option.cpp", "src/cg/cg_phasemanager.cpp", + "src/cg/ebo.cpp", + "src/cg/eh_func.cpp", + "src/cg/emit.cpp", + "src/cg/global.cpp", + "src/cg/live.cpp", + "src/cg/ico.cpp", + "src/cg/peep.cpp", + "src/cg/label_creation.cpp", "src/cg/loop.cpp", "src/cg/memlayout.cpp", - "src/cg/yieldpoint.cpp", - "src/cg/label_creation.cpp", "src/cg/offset_adjust.cpp", + "src/cg/optimize_common.cpp", + "src/cg/pressure.cpp", + "src/cg/proepilog.cpp", + "src/cg/reaching.cpp", + "src/cg/schedule.cpp", + "src/cg/strldr.cpp", + "src/cg/yieldpoint.cpp", ] deps_libcg = [] diff --git a/src/maple_be/include/ad/mad.h b/src/maple_be/include/ad/mad.h index e86f00483dc1624c95cb1d2e5051382c4d3b3a51..42393e58b268d2e027c90f5195e5cb6cd48915ff 100644 --- a/src/maple_be/include/ad/mad.h +++ b/src/maple_be/include/ad/mad.h @@ -32,6 +32,11 @@ enum UnitType : maple::uint8 { KUnitTypeNone }; +enum RealUnitKind : maple::uint32 { + kUnitKindUndef, +#include "target/mplad_unit_kind.def" + kUnitKindLast = 13 +}; enum SlotType : maple::uint8 { kSlotNone, @@ -67,7 +72,6 @@ class Unit { std::string GetName() const; bool IsFree(maple::uint32 cycle) const; void Occupy(const Insn &insn, maple::uint32 cycle); - unsigned int GetUnitTypeNum() const; void Release(); void AdvanceCycle(); void Dump(int indent = 0) const; @@ -220,25 +224,25 @@ class MAD { class AluShiftBypass : public Bypass { public: AluShiftBypass(LatencyType d, LatencyType u, int l) : Bypass(d, u, l) {} - ~AluShiftBypass() = default; + ~AluShiftBypass() override = default; - bool CanBypass(const Insn &defInsn, const Insn &useInsn) const; + bool CanBypass(const Insn &defInsn, const Insn &useInsn) const override; }; class AccumulatorBypass : public Bypass { public: AccumulatorBypass(LatencyType d, LatencyType u, int l) : Bypass(d, u, l) {} - ~AccumulatorBypass() = default; + ~AccumulatorBypass() override = default; - bool CanBypass(const Insn &defInsn, const Insn &useInsn) const; + bool CanBypass(const Insn &defInsn, const Insn &useInsn) const override; }; class StoreBypass : public Bypass { public: StoreBypass(LatencyType d, LatencyType u, int l) : Bypass(d, u, l) {} - ~StoreBypass() = default; + ~StoreBypass() override = default; - bool CanBypass(const Insn &defInsn, const Insn &useInsn) const; + bool CanBypass(const Insn &defInsn, const Insn &useInsn) const override; }; } /* namespace maplebe */ diff --git a/src/maple_be/include/ad/target/mplad_unit_kind.def b/src/maple_be/include/ad/target/mplad_unit_kind.def new file mode 100644 index 0000000000000000000000000000000000000000..0c56044925a600296d4fbf96bbd2944e8d1d7780 --- /dev/null +++ b/src/maple_be/include/ad/target/mplad_unit_kind.def @@ -0,0 +1,28 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +/* cortex_a55 function unit ID definition: */ +kUnitKindSlot0 = 1, +kUnitKindAgen = 2, +kUnitKindHazard = 4, +kUnitKindCrypto = 8, +kUnitKindMul = 16, +kUnitKindDiv = 32, +kUnitKindBranch = 64, +kUnitKindStAgu = 128, +kUnitKindLdAgu = 256, +kUnitKindFpAlu = 512, +kUnitKindFpMul = 1024, +kUnitKindFpDiv = 2048, + diff --git a/src/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/maple_be/include/cg/aarch64/aarch64_cgfunc.h index c99eb86470a5e9b90d0c9c9b310d12fe4ca2462f..319bfa89892a5cecbf51af21dd542734b4451689 100644 --- a/src/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -21,6 +21,8 @@ #include "aarch64_operand.h" #include "aarch64_insn.h" #include "aarch64_memlayout.h" +#include "aarch64_optimize_common.h" + namespace maplebe { class AArch64CGFunc : public CGFunc { public: @@ -457,6 +459,9 @@ class AArch64CGFunc : public CGFunc { CreateCfiImmOperand(val, size)); } + InsnVisitor *NewInsnModifier() override { + return memPool->New(*this); + } private: enum RelationOperator : uint8 { diff --git a/src/maple_be/include/cg/aarch64/aarch64_dependence.h b/src/maple_be/include/cg/aarch64/aarch64_dependence.h new file mode 100644 index 0000000000000000000000000000000000000000..c1f805c60bbec34fb86d2dfcddeb155fbd8f065c --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_dependence.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_DEPENDENCE_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_DEPENDENCE_H + +#include "dependence.h" +#include "cgfunc.h" +#include "aarch64_operand.h" + +namespace maplebe { +class AArch64DepAnalysis : public DepAnalysis { + public: + AArch64DepAnalysis(CGFunc &func, MemPool &mp, MAD &mad, bool beforeRA); + + ~AArch64DepAnalysis() override = default; + + void Run(BB &bb, MapleVector &nodes) override; + const std::string &GetDepTypeName(DepType depType) const override; + void DumpDepNode(DepNode &node) const override; + void DumpDepLink(DepLink &link, const DepNode *node) const override; + + protected: + void Init(BB &bb, MapleVector &nodes) override; + void ClearAllDepData() override; + void AnalysisAmbiInsns(BB &bb) override; + void AppendRegUseList(Insn &insn, regno_t regNO) override; + void AddDependence(DepNode& fromNode, DepNode &toNode, DepType depType) override; + void RemoveSelfDeps(Insn &insn) override; + void CombineClinit(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator) override; + void CombineDependence(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator, + bool isMemCombine = false) override; + void CombineMemoryAccessPair(DepNode &firstNode, DepNode &secondNode, bool useFirstOffset) override; + void BuildDepsUseReg(Insn &insn, regno_t regNO) override; + void BuildDepsDefReg(Insn &insn, regno_t regNO) override; + void BuildDepsAmbiInsn(Insn &insn) override; + void BuildDepsMayThrowInsn(Insn &insn) override; + bool NeedBuildDepsMem(const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd, Insn &memInsn) const; + void BuildDepsUseMem(Insn &insn, MemOperand &memOpnd) override; + void BuildDepsDefMem(Insn &insn, MemOperand &memOpnd) override; + void BuildAntiDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd); + void BuildOutputDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd); + void BuildDepsMemBar(Insn &insn) override; + void BuildDepsSeparator(DepNode &newSepNode, MapleVector &nodes) override; + void BuildDepsControlAll(DepNode &depNode, const MapleVector &nodes) override; + void BuildDepsAccessStImmMem(Insn &insn, bool isDest) override; + void BuildCallerSavedDeps(Insn &insn) override; + void BuildDepsBetweenControlRegAndCall(Insn &insn, bool isDest) override; + void BuildStackPassArgsDeps(Insn &insn) override; + void BuildDepsDirtyStack(Insn &insn) override; + void BuildDepsUseStack(Insn &insn) override; + void BuildDepsDirtyHeap(Insn &insn) override; + DepNode *BuildSeparatorNode() override; + bool IfInAmbiRegs(regno_t regNO) const override; + bool IsFrameReg(const RegOperand&) const override; + + private: + AArch64MemOperand *GetNextMemOperand(Insn &insn, AArch64MemOperand &aarchMemOpnd) const; + void BuildMemOpndDependency(Insn &insn, Operand &opnd, const AArch64OpndProp ®Prop); + void BuildOpndDependency(Insn &insn); + void BuildSpecialInsnDependency(Insn &insn, DepNode &depNode, const MapleVector &nodes); + void SeperateDependenceGraph(MapleVector &nodes, uint32 &nodeSum); + DepNode *GenerateDepNode(Insn &insn, MapleVector &nodes, int32 nodeSum, const MapleVector &comments); + void BuildAmbiInsnDependency(Insn &insn); + void BuildMayThrowInsnDependency(Insn &insn); + void UpdateRegUseAndDef(Insn &insn, DepNode &depNode); + void UpdateStackAndHeapDependency(DepNode &depNode, Insn &insn, const Insn &locInsn); + AArch64MemOperand *BuildNextMemOperandByByteSize(AArch64MemOperand &aarchMemOpnd, uint32 byteSize) const; + void AddDependence4InsnInVectorByType(MapleVector &insns, Insn &insn, const DepType &type); + void AddDependence4InsnInVectorByTypeAndCmp(MapleVector &insns, Insn &insn, const DepType &type); + void ReplaceDepNodeWithNewInsn(DepNode &firstNode, DepNode &secondNode, Insn& newInsn, bool isFromClinit) const; + void ClearDepNodeInfo(DepNode &depNode) const; + + Insn **regDefs = nullptr; + RegList **regUses = nullptr; + Insn *memBarInsn = nullptr; + bool hasAmbiRegs = false; + Insn *lastCallInsn = nullptr; + uint32 separatorIndex = 0; + Insn *lastFrameDef = nullptr; + MapleVector useRegnos; + MapleVector defRegnos; + MapleVector stackUses; + MapleVector stackDefs; + MapleVector heapUses; + MapleVector heapDefs; + MapleVector mayThrows; + /* instructions that can not across may throw instructions. */ + MapleVector ambiInsns; + /* register number that catch bb and cleanup bb uses. */ + MapleSet ehInRegs; +}; +} + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_DEPENDENCE_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/aarch64/aarch64_ebo.h b/src/maple_be/include/cg/aarch64/aarch64_ebo.h new file mode 100644 index 0000000000000000000000000000000000000000..f785cac5f21f4f145e0ca2f72a3566106c4e21c6 --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_ebo.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EBO_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EBO_H + +#include "ebo.h" +#include "aarch64_operand.h" +#include "aarch64_cgfunc.h" + +namespace maplebe { +using namespace maple; + +class AArch64Ebo : public Ebo { + public: + AArch64Ebo(CGFunc &func, MemPool &memPool, LiveAnalysis *live, bool before, const std::string &phase) + : Ebo(func, memPool, live, before, phase), + callerSaveRegTable(eboAllocator.Adapter()) { + a64CGFunc = static_cast(cgFunc); + } + + ~AArch64Ebo() override = default; + + protected: + MapleVector callerSaveRegTable; + AArch64CGFunc *a64CGFunc; + int32 GetOffsetVal(const MemOperand &mem) const override; + OpndInfo *OperandInfoDef(BB ¤tBB, Insn ¤tInsn, Operand &localOpnd) override; + const RegOperand &GetRegOperand(const Operand &opnd) const override; + bool IsGlobalNeeded(Insn &insn) const override; + bool OperandEqSpecial(const Operand &op1, const Operand &op2) const override; + bool DoConstProp(Insn &insn, uint32 i, Operand &opnd) override; + bool DoConstantFold(Insn &insn, const MapleVector &opnds) override; + bool ConstantOperand(Insn &insn, const MapleVector &opnds, const MapleVector &opndInfo) override; + void BuildCallerSaveRegisters() override; + void DefineCallerSaveRegisters(InsnInfo &insnInfo) override; + void DefineReturnUseRegister(Insn &insn) override; + void DefineCallUseSpecialRegister(Insn &insn) override; + void DefineClinitSpecialRegisters(InsnInfo &insnInfo) override; + bool SpecialSequence(Insn &insn, const MapleVector &origInfos) override; + bool IsMovToSIMDVmov(Insn &insn, const Insn &replaceInsn) const override; + bool ChangeLdrMop(Insn &insn, const Operand &opnd) const override; + bool IsAdd(const Insn &insn) const override; + bool IsFmov(const Insn &insn) const override; + bool IsClinitCheck(const Insn &insn) const override; + bool IsLastAndBranch(BB &bb, Insn &insn) const override; + bool ResIsNotDefAndUse(Insn &insn) const override; + + private: + /* The number of elements in callerSaveRegTable must less then 45. */ + static constexpr int32 kMaxCallerSaveReg = 45; + bool IsZeroRegister(const Operand &opnd) const; + bool CheckCondCode(const CondOperand &cond) const; + AArch64CC_t GetReverseCond(const CondOperand &cond) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EBO_H */ diff --git a/src/maple_be/include/cg/aarch64/aarch64_fixshortbranch.h b/src/maple_be/include/cg/aarch64/aarch64_fixshortbranch.h new file mode 100644 index 0000000000000000000000000000000000000000..e493596c3a4ace637a4f64861e431a4ba8da7cd6 --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_fixshortbranch.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_FIXSHORTBRANCH_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_FIXSHORTBRANCH_H + +#include +#include "aarch64_cg.h" +#include "optimize_common.h" +#include "mir_builder.h" + +namespace maplebe { +class AArch64FixShortBranch { + public: + explicit AArch64FixShortBranch(CGFunc *cf) : cgFunc(cf) { + cg = cgFunc->GetCG(); + } + ~AArch64FixShortBranch() = default; + void FixShortBranches(); + + private: + CGFunc *cgFunc; + CG *cg; + bool DistanceCheck(const BB &bb, LabelIdx targLabIdx, uint32 targId); + void SetInsnId(); +}; /* class AArch64ShortBranch */ + +CGFUNCPHASE(CgFixShortBranch, "fixshortbranch") +} /* namespace maplebe */ +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_FIXSHORTBRANCH_H */ diff --git a/src/maple_be/include/cg/aarch64/aarch64_global.h b/src/maple_be/include/cg/aarch64/aarch64_global.h new file mode 100644 index 0000000000000000000000000000000000000000..7295a006849ae46d6da42f200bea4056b7c79c8e --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_global.h @@ -0,0 +1,266 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_GLOBAL_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_GLOBAL_H + +#include "global.h" +#include "aarch64_operand.h" + +namespace maplebe { +using namespace maple; + +class AArch64GlobalOpt : public GlobalOpt { + public: + explicit AArch64GlobalOpt(CGFunc &func) : GlobalOpt(func) {} + ~AArch64GlobalOpt() override = default; + void Run() override; +}; + +class OptimizeManager { + public: + explicit OptimizeManager(CGFunc &cgFunc) : cgFunc(cgFunc) {} + ~OptimizeManager() = default; + template + void Optimize() { + OptimizePattern optPattern(cgFunc); + optPattern.Run(); + } + private: + CGFunc &cgFunc; +}; + +class OptimizePattern { + public: + explicit OptimizePattern(CGFunc &cgFunc) : cgFunc(cgFunc) {} + virtual ~OptimizePattern() = default; + virtual bool CheckCondition(Insn &insn) = 0; + virtual void Optimize(Insn &insn) = 0; + virtual void Run() = 0; + bool OpndDefByOne(Insn &insn, int32 useIdx) const; + bool OpndDefByZero(Insn &insn, int32 useIdx) const; + bool OpndDefByOneOrZero(Insn &insn, int32 useIdx) const; + void ReplaceAllUsedOpndWithNewOpnd(const InsnSet &useInsnSet, uint32 regNO, + Operand &newOpnd, bool updateInfo) const; + + static bool InsnDefOne(Insn &insn); + static bool InsnDefZero(Insn &insn); + static bool InsnDefOneOrZero(Insn &insn); + protected: + virtual void Init() = 0; + CGFunc &cgFunc; +}; + +/* + * Do Forward prop when insn is mov + * mov xx, x1 + * ... // BBs and x1 is live + * mOp yy, xx + * + * => + * mov x1, x1 + * ... // BBs and x1 is live + * mOp yy, x1 + */ +class ForwardPropPattern : public OptimizePattern { + public: + explicit ForwardPropPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~ForwardPropPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + private: + InsnSet firstRegUseInsnSet; + std::set modifiedBB; +}; + +/* + * Do back propagate of vreg/preg when encount following insn: + * + * mov vreg/preg1, vreg2 + * + * back propagate reg1 to all vreg2's use points and def points, when all of them is in same bb + */ +class BackPropPattern : public OptimizePattern { + public: + explicit BackPropPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~BackPropPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + bool CheckAndGetOpnd(Insn &insn); + bool DestOpndHasUseInsns(Insn &insn); + bool DestOpndLiveOutToEHSuccs(Insn &insn); + bool CheckSrcOpndDefAndUseInsns(Insn &insn); + bool CheckPredefineInsn(Insn &insn); + bool CheckRedefineInsn(Insn &insn); + RegOperand *firstRegOpnd = nullptr; + RegOperand *secondRegOpnd = nullptr; + uint32 firstRegNO = 0; + uint32 secondRegNO = 0; + InsnSet srcOpndUseInsnSet; + Insn *defInsnForSecondOpnd = nullptr; +}; + +/* + * when w0 has only one valid bit, these tranformation will be done + * cmp w0, #0 + * cset w1, NE --> mov w1, w0 + * + * cmp w0, #0 + * cset w1, EQ --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, NE --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, EQ --> mov w1, w0 + * + * cmp w0, #0 + * cset w0, NE -->null + * + * cmp w0, #1 + * cset w0, EQ -->null + * + * condition: + * 1. the first operand of cmp instruction must has only one valid bit + * 2. the second operand of cmp instruction must be 0 or 1 + * 3. flag register of cmp isntruction must not be used later + */ +class CmpCsetPattern : public OptimizePattern { + public: + explicit CmpCsetPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~CmpCsetPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + Insn *nextInsn = nullptr; + int64 cmpConstVal = 0; + Operand *cmpFirstOpnd = nullptr; + Operand *cmpSecondOpnd = nullptr; + Operand *csetFirstOpnd = nullptr; +}; + +/* + * mov w5, #1 + * ... --> cset w5, NE + * mov w0, #0 + * csel w5, w5, w0, NE + * + * mov w5, #0 + * ... --> cset w5,EQ + * mov w0, #1 + * csel w5, w5, w0, NE + * + * condition: + * 1.all define points of w5 are defined by: mov w5, #1(#0) + * 2.all define points of w0 are defined by: mov w0, #0(#1) + * 3.w0 will not be used after: csel w5, w5, w0, NE(EQ) + */ +class CselPattern : public OptimizePattern { + public: + explicit CselPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~CselPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final {} + + private: + AArch64CC_t GetInverseCondCode(const CondOperand &cond) const; +}; + +/* + * uxtb w0, w0 --> null + * uxth w0, w0 --> null + * + * condition: + * 1. validbits(w0)<=8,16,32 + * 2. the first operand is same as the second operand + * + * uxtb w0, w1 --> null + * uxth w0, w1 --> null + * + * condition: + * 1. validbits(w1)<=8,16,32 + * 2. the use points of w0 has only one define point, that is uxt w0, w1 + */ +class RedundantUxtPattern : public OptimizePattern { + public: + explicit RedundantUxtPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~RedundantUxtPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + uint32 GetMaximumValidBit(Insn &insn, uint8 udIdx, InsnSet &insnChecked) const; + static uint32 GetInsnValidBit(Insn &insn); + InsnSet useInsnSet; + uint32 firstRegNO = 0; + Operand *secondOpnd = nullptr; +}; + +/* + * bl MCC_NewObj_flexible_cname bl MCC_NewObj_flexible_cname + * mov x21, x0 // [R203] + * str x0, [x29,#16] // local var: Reg0_R6340 [R203] --> str x0, [x29,#16] // local var: Reg0_R6340 [R203] + * ... (has call) ... (has call) + * mov x2, x21 // use of x21 ldr x2, [x29, #16] + * bl *** bl *** + */ +class LocalVarSaveInsnPattern : public OptimizePattern { + public: + explicit LocalVarSaveInsnPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~LocalVarSaveInsnPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + bool CheckFirstInsn(Insn &firstInsn); + bool CheckSecondInsn(); + bool CheckAndGetUseInsn(Insn &firstInsn); + bool CheckLiveRange(Insn &firstInsn); + Operand *firstInsnSrcOpnd = nullptr; + Operand *firstInsnDestOpnd = nullptr; + Operand *secondInsnSrcOpnd = nullptr; + Operand *secondInsnDestOpnd = nullptr; + Insn *useInsn = nullptr; + Insn *secondInsn = nullptr; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_GLOBAL_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/aarch64/aarch64_ico.h b/src/maple_be/include/cg/aarch64/aarch64_ico.h new file mode 100644 index 0000000000000000000000000000000000000000..a6616ae810bf5509d885159b57806484cce641c9 --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_ico.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ICO_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ICO_H +#include "ico.h" +#include "aarch64_isa.h" +#include "optimize_common.h" +#include "live.h" + +namespace maplebe { +class AArch64IfConversionOptimizer : public IfConversionOptimizer { + public: + AArch64IfConversionOptimizer(CGFunc &func, MemPool &memPool) : IfConversionOptimizer(func, memPool) {} + + ~AArch64IfConversionOptimizer() override = default; + void InitOptimizePatterns() override; +}; + +/* If-Then-Else pattern */ +class AArch64ICOPattern : public ICOPattern { + public: + explicit AArch64ICOPattern(CGFunc &func) : ICOPattern(func) {} + ~AArch64ICOPattern() override = default; + protected: + bool DoOpt(BB &cmpBB, BB *ifBB, BB *elseBB, BB &joinBB) override; + AArch64CC_t Encode(MOperator mOp, bool inverse) const; + Insn *BuildCondSet(const Insn &branch, RegOperand ®, bool inverse); + Insn *BuildCondSel(const Insn &branch, MOperator mOp, RegOperand &dst, RegOperand &src1, RegOperand &src2); + Insn *BuildCmpInsn(const Insn &condBr); + bool IsSetInsn(const Insn &insn, Operand *&dest, Operand *&src) const; + bool BuildCondMovInsn(BB &cmpBB, const BB &bb, const std::map &ifDestSrcMap, + const std::map &elseDestSrcMap, bool elseBBIsProcessed, + std::vector &generateInsn); + void GenerateInsnForImm(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, RegOperand &destReg, + std::vector &generateInsn); + Operand *GetDestReg(const std::map &destSrcMap, const RegOperand &destReg) const; + void GenerateInsnForReg(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, RegOperand &destReg, + std::vector &generateInsn); + RegOperand *GenerateRegAndTempInsn(Operand &dest, const RegOperand &destReg, std::vector &generateInsn); + bool CheckModifiedRegister(Insn &insn, std::map &destSrcMap, Operand &src, + Operand &dest) const; + bool CheckCondMoveBB(BB *bb, std::map &destSrcMap, + std::vector &destRegs, Operand *flagReg) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ICO_H */ diff --git a/src/maple_be/include/cg/aarch64/aarch64_optimize_common.h b/src/maple_be/include/cg/aarch64/aarch64_optimize_common.h new file mode 100644 index 0000000000000000000000000000000000000000..5ed90dafd86a7dd61cfb0ddd6c84f55c420fffec --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_optimize_common.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPTIMIZE_COMMON_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPTIMIZE_COMMON_H + +#include "aarch64_isa.h" +#include "optimize_common.h" + +namespace maplebe { +using namespace maple; + + +class AArch64InsnVisitor : public InsnVisitor { + public: + explicit AArch64InsnVisitor(CGFunc &func) : InsnVisitor(func) {} + + ~AArch64InsnVisitor() override = default; + + void ModifyJumpTarget(maple::LabelIdx targetLabel, BB &bb) override; + void ModifyJumpTarget(Operand &targetOperand, BB &bb) override; + void ModifyJumpTarget(BB &newTarget, BB &bb) override; + /* Check if it requires to add extra gotos when relocate bb */ + MOperator FlipConditionOp(MOperator flippedOp, int &targetIdx) override; + Insn *CloneInsn(Insn &originalInsn) override; + LabelIdx GetJumpLabel(const Insn &insn) const override; + bool IsCompareInsn(const Insn &insn) const override; + bool IsCompareAndBranchInsn(const Insn &insn) const override; + RegOperand *CreateVregFromReg(const RegOperand &pReg) override; + + private: + int GetJumpTargetIdx(const Insn &insn) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPTIMIZE_COMMON_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/aarch64/aarch64_peep.h b/src/maple_be/include/cg/aarch64/aarch64_peep.h new file mode 100644 index 0000000000000000000000000000000000000000..3a95dabff1ae1c24ef9d9e6844f10f2ebb6e6ef0 --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_peep.h @@ -0,0 +1,698 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PEEP_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PEEP_H + +#include +#include "peep.h" +#include "aarch64_cg.h" +#include "optimize_common.h" +#include "mir_builder.h" + +namespace maplebe { +/* + * Looking for identical mem insn to eliminate. + * If two back-to-back is: + * 1. str + str + * 2. str + ldr + * And the [MEM] is pattern of [base + offset] + * 1. The [MEM] operand is exactly same then first + * str can be eliminate. + * 2. The [MEM] operand is exactly same and src opnd + * of str is same as the dest opnd of ldr then + * ldr can be eliminate + */ +class RemoveIdenticalLoadAndStoreAArch64 : public PeepPattern { + public: + explicit RemoveIdenticalLoadAndStoreAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveIdenticalLoadAndStoreAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool IsMemOperandsIdentical(const Insn &insn1, const Insn &insn2) const; +}; + +/* Remove redundant mov which src and dest opnd is exactly same */ +class RemoveMovingtoSameRegAArch64 : public PeepPattern { + public: + explicit RemoveMovingtoSameRegAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveMovingtoSameRegAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* Combining 2 STRs into 1 stp or 2 LDRs into 1 ldp, when they are + * back to back and the [MEM] they access is conjointed. + */ +class CombineContiLoadAndStoreAArch64 : public PeepPattern { + public: + explicit CombineContiLoadAndStoreAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~CombineContiLoadAndStoreAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* Eliminate the sxt[b|h|w] w0, w0;, when w0 is satisify following: + * i) mov w0, #imm (#imm is not out of range) + * ii) ldrs[b|h] w0, [MEM] + */ +class EliminateSpecifcSXTAArch64 : public PeepPattern { + public: + explicit EliminateSpecifcSXTAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~EliminateSpecifcSXTAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* Eliminate the uxt[b|h|w] w0, w0;when w0 is satisify following: + * i) mov w0, #imm (#imm is not out of range) + * ii) mov w0, R0(Is return value of call and return size is not of range) + * iii)w0 is defined and used by special load insn and uxt[] pattern + */ +class EliminateSpecifcUXTAArch64 : public PeepPattern { + public: + explicit EliminateSpecifcUXTAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~EliminateSpecifcUXTAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* fmov ireg1 <- freg1 previous insn + * fmov ireg2 <- freg1 current insn + * use ireg2 may or may not be present + * => + * fmov ireg1 <- freg1 previous insn + * mov ireg2 <- ireg1 current insn + * use ireg1 may or may not be present + */ +class FmovRegAArch64 : public PeepPattern { + public: + explicit FmovRegAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~FmovRegAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* cbnz x0, labelA + * mov x0, 0 + * b return-bb + * labelA: + * => + * cbz x0, return-bb + * labelA: + */ +class CbnzToCbzAArch64 : public PeepPattern { + public: + explicit CbnzToCbzAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~CbnzToCbzAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* When exist load after load or load after store, and [MEM] is + * totally same. Then optimize them. + */ +class ContiLDRorSTRToSameMEMAArch64 : public PeepPattern { + public: + explicit ContiLDRorSTRToSameMEMAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ContiLDRorSTRToSameMEMAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Remove following patterns: + * mov x1, x0 + * bl MCC_IncDecRef_NaiveRCFast + */ +class RemoveIncDecRefAArch64 : public PeepPattern { + public: + explicit RemoveIncDecRefAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveIncDecRefAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * When GCONLY is enabled, the read barriers can be inlined. + * we optimize it with the following pattern: + * #if USE_32BIT_REF + * bl MCC_LoadRefField -> ldr w0, [x1] + * bl MCC_LoadVolatileField -> ldar w0, [x1] + * bl MCC_LoadRefStatic -> ldr w0, [x0] + * bl MCC_LoadVolatileStaticField -> ldar w0, [x0] + * bl MCC_Dummy -> omitted + * #else + * bl MCC_LoadRefField -> ldr x0, [x1] + * bl MCC_LoadVolatileField -> ldar x0, [x1] + * bl MCC_LoadRefStatic -> ldr x0, [x0] + * bl MCC_LoadVolatileStaticField -> ldar x0, [x0] + * bl MCC_Dummy -> omitted + * #endif + * + * if we encounter a tail call optimized read barrier call, + * such as: + * b MCC_LoadRefField + * a return instruction will be added just after the load: + * ldr w0, [x1] + * ret + */ +class InlineReadBarriersAArch64 : public PeepPattern { + public: + explicit InlineReadBarriersAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~InlineReadBarriersAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * mov w1, #34464 + * movk w1, #1, LSL #16 + * sdiv w2, w0, w1 + * ========> + * mov w1, #34464 // may deleted if w1 not live anymore. + * movk w1, #1, LSL #16 // may deleted if w1 not live anymore. + * mov w16, #0x588f + * movk w16, #0x4f8b, LSL #16 + * smull x16, w0, w16 + * lsr x16, x16, #32 + * add x16, x16, w0, SXTW + * lsr x16, x16, #17 + * add x2, x16, x0, LSR #31 + */ +class ReplaceDivToMultiAArch64 : public PeepPattern { + public: + explicit ReplaceDivToMultiAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceDivToMultiAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Optimize the following patterns: + * and w0, w0, #1 ====> and w0, w0, #1 + * cmp w0, #1 + * cset w0, EQ + * + * and w0, w0, #1 ====> and w0, w0, #1 + * cmp w0, #0 + * cset w0, NE + * --------------------------------------------------- + * and w0, w0, #imm ====> ubfx w0, w0, pos, size + * cmp w0, #imm + * cset w0, EQ + * + * and w0, w0, #imm ====> ubfx w0, w0, pos, size + * cmp w0, #0 + * cset w0, NE + * conditions: + * imm is pos power of 2 + */ +class AndCmpBranchesToCsetAArch64 : public PeepPattern { + public: + explicit AndCmpBranchesToCsetAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~AndCmpBranchesToCsetAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * We optimize the following pattern in this function: + * cmp w[0-9]*, wzr ====> tbz w[0-9]*, #31, .label + * bge .label + * + * cmp wzr, w[0-9]* ====> tbz w[0-9]*, #31, .label + * ble .label + * + * cmp w[0-9]*,wzr ====> tbnz w[0-9]*, #31, .label + * blt .label + * + * cmp wzr, w[0-9]* ====> tbnz w[0-9]*, #31, .label + * bgt .label + * + * cmp w[0-9]*, #0 ====> tbz w[0-9]*, #31, .label + * bge .label + * + * cmp w[0-9]*, #0 ====> tbnz w[0-9]*, #31, .label + * blt .label + */ +class ZeroCmpBranchesAArch64 : public PeepPattern { + public: + explicit ZeroCmpBranchesAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ZeroCmpBranchesAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * cmp w0, #0 + * cset w1, NE --> mov w1, w0 + * + * cmp w0, #0 + * cset w1, EQ --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, NE --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, EQ --> mov w1, w0 + * + * cmp w0, #0 + * cset w0, NE -->null + * + * cmp w0, #1 + * cset w0, EQ -->null + * + * condition: + * 1. the first operand of cmp instruction must has only one valid bit + * 2. the second operand of cmp instruction must be 0 or 1 + * 3. flag register of cmp isntruction must not be used later + */ +class CmpCsetAArch64 : public PeepPattern { + public: + explicit CmpCsetAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~CmpCsetAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool CheckOpndDefPoints(Insn &checkInsn, int opndIdx); + const Insn *DefInsnOfOperandInBB(const Insn &startInsn, const Insn &checkInsn, int opndIdx); + bool OpndDefByOneValidBit(const Insn &defInsn); + bool FlagUsedLaterInCurBB(const BB &bb, Insn &startInsn) const; +}; + +/* + * add x0, x1, x0 + * ldr x2, [x0] + * ==> + * ldr x2, [x1, x0] + */ +class ComplexMemOperandAddAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandAddAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandAddAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + private: + + bool IsExpandBaseOpnd(const Insn &insn, Insn &prevInsn); +}; + +/* + * cbnz w0, @label + * .... + * mov w0, #0 (elseBB) -->this instruction can be deleted + * + * cbz w0, @label + * .... + * mov w0, #0 (ifBB) -->this instruction can be deleted + * + * condition: + * 1.there is not predefine points of w0 in elseBB(ifBB) + * 2.the first opearnd of cbnz insn is same as the first Operand of mov insn + * 3.w0 is defined by move 0 + * 4.all preds of elseBB(ifBB) end with cbnz or cbz + * + * NOTE: if there are multiple preds and there is not define point of w0 in one pred, + * (mov w0, 0) can't be deleted, avoiding use before def. + */ +class DeleteMovAfterCbzOrCbnzAArch64 : public PeepPattern { + public: + explicit DeleteMovAfterCbzOrCbnzAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) { + cgcfg = cgFunc.GetTheCFG(); + cgcfg->InitInsnVisitor(cgFunc); + } + ~DeleteMovAfterCbzOrCbnzAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool PredBBCheck(BB &bb, bool checkCbz, const Operand &opnd) const; + bool OpndDefByMovZero(const Insn &insn) const; + bool NoPreDefine(Insn &testInsn) const; + void ProcessBBHandle(BB *processBB, const BB &bb, const Insn &insn); + CGCFG *cgcfg; +}; + +/* + * We optimize the following pattern in this function: + * if w0's valid bits is one + * uxtb w0, w0 + * eor w0, w0, #1 + * cbz w0, .label + * => + * tbnz w0, .label + * && + * if there exists uxtb w0, w0 and w0's valid bits is + * less than 8, eliminate it. + */ +class OneHoleBranchesPreAArch64 : public PeepPattern { + public: + explicit OneHoleBranchesPreAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~OneHoleBranchesPreAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + private: + MOperator FindNewMop(const BB &bb, const Insn &insn) const; +}; + +/* + * We optimize the following pattern in this function: + * movz x0, #11544, LSL #0 + * movk x0, #21572, LSL #16 + * movk x0, #8699, LSL #32 + * movk x0, #16393, LSL #48 + * => + * ldr x0, label_of_constant_1 + */ +class LoadFloatPointAArch64 : public PeepPattern { + public: + explicit LoadFloatPointAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~LoadFloatPointAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + private: + bool FindLoadFloatPoint(std::vector &optInsn, Insn &insn); + bool IsPatternMatch(const std::vector &optInsn); +}; + +/* + * Optimize the following patterns: + * orr w21, w0, #0 ====> mov w21, w0 + * orr w21, #0, w0 ====> mov w21, w0 + */ +class ReplaceOrrToMovAArch64 : public PeepPattern { + public: + explicit ReplaceOrrToMovAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceOrrToMovAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Optimize the following patterns: + * ldr w0, [x21,#68] ldr w0, [x21,#68] + * mov w1, #-1 mov w1, #-1 + * cmp w0, w1 ====> cmn w0, #-1 + */ +class ReplaceCmpToCmnAArch64 : public PeepPattern { + public: + explicit ReplaceCmpToCmnAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceCmpToCmnAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Remove following patterns: + * mov x0, XX + * mov x1, XX + * bl MCC_IncDecRef_NaiveRCFast + */ +class RemoveIncRefAArch64 : public PeepPattern { + public: + explicit RemoveIncRefAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveIncRefAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * opt long int compare with 0 + * *cmp x0, #0 + * csinv w0, wzr, wzr, GE + * csinc w0, w0, wzr, LE + * cmp w0, #0 + * => + * cmp x0, #0 + */ +class LongIntCompareWithZAArch64 : public PeepPattern { + public: + explicit LongIntCompareWithZAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~LongIntCompareWithZAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool FindLondIntCmpWithZ(std::vector &optInsn, Insn &insn); + bool IsPatternMatch(const std::vector &optInsn); +}; + +/* + * add x0, x1, #:lo12:Ljava_2Futil_2FLocale_241_3B_7C_24SwitchMap_24java_24util_24Locale_24Category + * ldr x2, [x0] + * ==> + * ldr x2, [x1, #:lo12:Ljava_2Futil_2FLocale_241_3B_7C_24SwitchMap_24java_24util_24Locale_24Category] + */ +class ComplexMemOperandAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * add x0, x1, x0 + * ldr x2, [x0] + * ==> + * ldr x2, [x1, x0] + */ +class ComplexMemOperandPreAddAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandPreAddAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandPreAddAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * add x0, x0, x1, LSL #2 + * ldr x2, [x0] + * ==> + * ldr x2, [x0,x1,LSL #2] + */ +class ComplexMemOperandLSLAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandLSLAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandLSLAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * ldr x0, label_of_constant_1 + * fmov d4, x0 + * ==> + * ldr d4, label_of_constant_1 + */ +class ComplexMemOperandLabelAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandLabelAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandLabelAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * mov R0, vreg1 / R0 mov R0, vreg1 + * add vreg2, vreg1, #imm1 add vreg2, vreg1, #imm1 + * mov R1, vreg2 mov R1, vreg2 + * mov R2, vreg3 mov R2, vreg3 + * ... ... + * mov R0, vreg1 + * add vreg4, vreg1, #imm2 -> str vreg5, [vreg1, #imm2] + * mov R1, vreg4 + * mov R2, vreg5 + */ +class WriteFieldCallAArch64 : public PeepPattern { + public: + struct WriteRefFieldParam { + Operand *objOpnd = nullptr; + RegOperand *fieldBaseOpnd = nullptr; + int64 fieldOffset = 0; + Operand *fieldValue = nullptr; + }; + explicit WriteFieldCallAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~WriteFieldCallAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + void Reset() { + hasWriteFieldCall = false; + prevCallInsn = nullptr; + } + + private: + bool hasWriteFieldCall = false; + Insn *prevCallInsn = nullptr; + WriteRefFieldParam firstCallParam; + bool WriteFieldCallOptPatternMatch(const Insn &writeFieldCallInsn, WriteRefFieldParam ¶m, + std::vector ¶mDefInsns); + bool IsWriteRefFieldCallInsn(const Insn &insn); +}; + +/* + * Remove following patterns: + * mov x0, xzr/#0 + * bl MCC_DecRef_NaiveRCFast + */ +class RemoveDecRefAArch64 : public PeepPattern { + public: + explicit RemoveDecRefAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveDecRefAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * We optimize the following pattern in this function: + * add x1, x1, #16 + * add w2, w10, w10 + * add w2, w2, #1 + * sxtw x2, w2 + * add x1, x1, x2, LSL #3 + * => + * add x1, x1, w10, SXTW #(3+1) combine origin insn 2 (self-added operation) + * add x1, x1, #24 + */ +class ComputationTreeAArch64 : public PeepPattern { + public: + explicit ComputationTreeAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComputationTreeAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool IsPatternMatch(const std::vector &optInsn) const; + bool FindComputationTree(std::vector &optInsn, Insn &insn); +}; + +/* + * We optimize the following pattern in this function: + * and x1, x1, #imm (is n power of 2) + * cbz/cbnz x1, .label + * => + * and x1, x1, #imm (is n power of 2) + * tbnz/tbz x1, #n, .label + */ +class OneHoleBranchesAArch64 : public PeepPattern { + public: + explicit OneHoleBranchesAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~OneHoleBranchesAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Replace following pattern: + * mov x1, xzr + * bl MCC_IncDecRef_NaiveRCFast + * => + * bl MCC_IncRef_NaiveRCFast + */ +class ReplaceIncDecWithIncAArch64 : public PeepPattern { + public: + explicit ReplaceIncDecWithIncAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceIncDecWithIncAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Optimize the following patterns: + * and w0, w6, #1 ====> tbz w6, 0, .label + * cmp w0, #1 + * bne .label + * + * and w0, w6, #16 ====> tbz w6, 4, .label + * cmp w0, #16 + * bne .label + * + * and w0, w6, #32 ====> tbnz w6, 5, .label + * cmp w0, #32 + * beq .label + * + * and x0, x6, #32 ====> tbz x6, 5, .label + * cmp x0, #0 + * beq .label + * + * and x0, x6, #32 ====> tbnz x6, 5, .label + * cmp x0, #0 + * bne .label + */ +class AndCmpBranchesToTbzAArch64 : public PeepPattern { + public: + explicit AndCmpBranchesToTbzAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~AndCmpBranchesToTbzAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +class AArch64PeepHole : public PeepPatternMatch { + public: + AArch64PeepHole(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PeepHole() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kRemoveIdenticalLoadAndStoreOpt = 0, + kRemoveMovingtoSameRegOpt, + kCombineContiLoadAndStoreOpt, + kEliminateSpecifcSXTOpt, + kEliminateSpecifcUXTOpt, + kFmovRegOpt, + kCbnzToCbzOpt, + kContiLDRorSTRToSameMEMOpt, + kRemoveIncDecRefOpt, + kInlineReadBarriersOpt, + kReplaceDivToMultiOpt, + kAndCmpBranchesToCsetOpt, + kZeroCmpBranchesOpt, + kPeepholeOptsNum + }; +}; + +class AArch64PeepHole0 : public PeepPatternMatch { + public: + AArch64PeepHole0(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PeepHole0() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kRemoveIdenticalLoadAndStoreOpt = 0, + kCmpCsetOpt, + kComplexMemOperandOptAdd, + kDeleteMovAfterCbzOrCbnzOpt, + kPeepholeOptsNum + }; +}; + +class AArch64PrePeepHole : public PeepPatternMatch { + public: + AArch64PrePeepHole(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PrePeepHole() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kOneHoleBranchesPreOpt = 0, + kLoadFloatPointOpt, + kReplaceOrrToMovOpt, + kReplaceCmpToCmnOpt, + kRemoveIncRefOpt, + kLongIntCompareWithZOpt, + kComplexMemOperandOpt, + kComplexMemOperandPreOptAdd, + kComplexMemOperandOptLSL, + kComplexMemOperandOptLabel, + kWriteFieldCallOpt, + kPeepholeOptsNum + }; +}; + +class AArch64PrePeepHole1 : public PeepPatternMatch { + public: + AArch64PrePeepHole1(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PrePeepHole1() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kRemoveDecRefOpt = 0, + kComputationTreeOpt, + kOneHoleBranchesOpt, + kReplaceIncDecWithIncOpt, + kAndCmpBranchesToTbzOpt, + kPeepholeOptsNum + }; +}; +} /* namespace maplebe */ +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PEEP_H */ diff --git a/src/maple_be/include/cg/aarch64/aarch64_reaching.h b/src/maple_be/include/cg/aarch64/aarch64_reaching.h new file mode 100644 index 0000000000000000000000000000000000000000..6497c76d68042ba96e9af84521aa3bee71b98b06 --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_reaching.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REACHING_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REACHING_H + +#include "reaching.h" + +namespace maplebe { +class AArch64ReachingDefinition : public ReachingDefinition { + public: + AArch64ReachingDefinition(CGFunc &func, MemPool &memPool) : ReachingDefinition(func, memPool) {} + ~AArch64ReachingDefinition() override = default; + std::vector FindRegDefBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn) const final; + std::vector FindMemDefBetweenInsn(uint32 offset, const Insn *startInsn, Insn *endInsn) const final; + bool FindRegUseBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn, InsnSet &useInsnSet) const final; + bool FindMemUseBetweenInsn(uint32 offset, Insn *startInsn, const Insn *endInsn, + InsnSet &useInsnSet) const final; + InsnSet FindDefForRegOpnd(Insn &insn, uint32 indexOrRegNO, bool isRegNO = false) const final; + InsnSet FindDefForMemOpnd(Insn &insn, uint32 indexOrOffset, bool isOffset = false) const final; + InsnSet FindUseForMemOpnd(Insn &insn, uint8 index, bool secondMem = false) const final; + + protected: + void InitStartGen() final; + void InitEhDefine(BB &bb) final; + void InitGenUse(BB &bb, bool firstTime = true) final; + void GenAllCallerSavedRegs(BB &bb) final; + void AddRetPseudoInsn(BB &bb) final; + void AddRetPseudoInsns() final; + bool IsCallerSavedReg(uint32 regNO) const final; + void FindRegDefInBB(uint32 regNO, BB &bb, InsnSet &defInsnSet) const final; + void FindMemDefInBB(uint32 offset, BB &bb, InsnSet &defInsnSet) const final; + void DFSFindDefForRegOpnd(const BB &startBB, uint32 regNO, std::vector &visitedBB, + InsnSet &defInsnSet) const final; + void DFSFindDefForMemOpnd(const BB &startBB, uint32 offset, std::vector &visitedBB, + InsnSet &defInsnSet) const final; + int32 GetStackSize() const final; + + private: + void InitInfoForMemOperand(Insn &insn, Operand &opnd, bool isDef); + inline void InitInfoForListOpnd(const BB &bb, Operand &opnd); + inline void InitInfoForConditionCode(const BB &bb); + inline void InitInfoForRegOpnd(const BB &bb, Operand &opnd, bool isDef); + void InitMemInfoForClearStackCall(Insn &callInsn); + inline bool CallInsnClearDesignateStackRef(const Insn &callInsn, int64 offset) const; + int64 GetEachMemSizeOfPair(MOperator opCode) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REACHING_H */ diff --git a/src/maple_be/include/cg/aarch64/aarch64_schedule.h b/src/maple_be/include/cg/aarch64/aarch64_schedule.h new file mode 100644 index 0000000000000000000000000000000000000000..0b4dddc828e727f03c54b865e67a2a4621daae97 --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_schedule.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_SCHEDULE_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_SCHEDULE_H + +#include "schedule.h" +#include "aarch64_operand.h" + +namespace maplebe { +enum RegisterType : uint8 { + kRegisterUndef, + kRegisterInt, + kRegisterFloat, + kRegisterCc, + kRegisterLast, +}; + +class ScheduleProcessInfo { + public: + explicit ScheduleProcessInfo(uint32 size) { + availableReadyList.reserve(size); + scheduledNodes.reserve(size); + } + + ~ScheduleProcessInfo() = default; + + uint32 GetLastUpdateCycle() const { + return lastUpdateCycle; + } + + void SetLastUpdateCycle(uint32 updateCycle) { + lastUpdateCycle = updateCycle; + } + + uint32 GetCurrCycle() const { + return currCycle; + } + + void IncCurrCycle() { + ++currCycle; + } + + void DecAdvanceCycle() { + advanceCycle--; + } + + uint32 GetAdvanceCycle() const { + return advanceCycle; + } + + void SetAdvanceCycle(uint32 cycle) { + advanceCycle = cycle; + } + + void ClearAvailableReadyList() { + availableReadyList.clear(); + } + + void PushElemIntoAvailableReadyList(DepNode *node) { + availableReadyList.push_back(node); + } + + size_t SizeOfAvailableReadyList() const { + return availableReadyList.size(); + } + + bool AvailableReadyListIsEmpty() const { + return availableReadyList.empty(); + } + + void SetAvailableReadyList(const std::vector &tempReadyList) { + availableReadyList = tempReadyList; + } + + const std::vector &GetAvailableReadyList() const { + return availableReadyList; + } + + const std::vector &GetAvailableReadyList() { + return availableReadyList; + } + + void PushElemIntoScheduledNodes(DepNode *node) { + node->SetState(kScheduled); + node->SetSchedCycle(currCycle); + node->OccupyUnits(); + scheduledNodes.push_back(node); + } + + bool IsFirstSeparator() const { + return isFirstSeparator; + } + + void ResetIsFirstSeparator() { + isFirstSeparator = false; + } + + size_t SizeOfScheduledNodes() const { + return scheduledNodes.size(); + } + + const std::vector &GetScheduledNodes() const { + return scheduledNodes; + } + + private: + std::vector availableReadyList; + std::vector scheduledNodes; + uint32 lastUpdateCycle = 0; + uint32 currCycle = 0; + uint32 advanceCycle = 0; + bool isFirstSeparator = true; +}; + +class AArch64Schedule : public Schedule { + public: + AArch64Schedule(CGFunc &func, MemPool &memPool, LiveAnalysis &live, const std::string &phaseName) + : Schedule(func, memPool, live, phaseName) {} + ~AArch64Schedule() override = default; + protected: + void DumpDepGraph(const MapleVector &nodes) const; + void DumpScheduleResult(const MapleVector &nodes, SimulateType type) const; + void GenerateDot(const BB &bb, const MapleVector &nodes) const; + + private: + void Init() override; + void MemoryAccessPairOpt() override; + void ClinitPairOpt() override; + void RegPressureScheduling(BB &bb, MapleVector &nd) override; + uint32 DoSchedule() override; + uint32 DoBruteForceSchedule() override; + uint32 SimulateOnly() override; + void UpdateBruteForceSchedCycle() override; + void IterateBruteForce(DepNode &targetNode, MapleVector &readyList, uint32 currCycle, + MapleVector &scheduledNodes, uint32 &maxCycleCount, + MapleVector &optimizedScheduledNodes) override; + void FindAndCombineMemoryAccessPair(const std::vector &readyList) override; + bool CanCombine(const Insn &insn) const override; + void ListScheduling(bool beforeRA) override; + void BruteForceScheduling(const BB &bb); + void SimulateScheduling(const BB &bb); + void FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis) override; + uint32 ComputeEstart(uint32 cycle) override; + void ComputeLstart(uint32 maxEstart) override; + void UpdateELStartsOnCycle(uint32 cycle) override; + void RandomTest() override; + void EraseNodeFromReadyList(const DepNode &target) override; + void EraseNodeFromNodeList(const DepNode &target, MapleVector &readyList) override; + uint32 GetNextSepIndex() const override; + void CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const override; + static bool IfUseUnitKind(const DepNode &depNode, uint32 index); + void UpdateReadyList(DepNode &targetNode, MapleVector &readyList, bool updateEStart) override; + void UpdateScheduleProcessInfo(ScheduleProcessInfo &info); + bool CheckSchedulable(ScheduleProcessInfo &info) const; + void SelectNode(ScheduleProcessInfo &scheduleInfo); + static void DumpDebugInfo(const ScheduleProcessInfo &info); + static bool CompareDepNode(const DepNode &node1, const DepNode &node2); + void CalculateMaxUnitKindCount(ScheduleProcessInfo &scheduleInfo); + static uint32 maxUnitIndex; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_SCHEDULE_H */ diff --git a/src/maple_be/include/cg/aarch64/aarch64_strldr.h b/src/maple_be/include/cg/aarch64/aarch64_strldr.h new file mode 100644 index 0000000000000000000000000000000000000000..5cd052f195627d3354436a4bb3eec0b60f9edf8f --- /dev/null +++ b/src/maple_be/include/cg/aarch64/aarch64_strldr.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_STRLDR_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_STRLDR_H + +#include "strldr.h" +#include "aarch64_reaching.h" + +namespace maplebe { +using namespace maple; + +class AArch64StoreLoadOpt : public StoreLoadOpt { + public: + AArch64StoreLoadOpt(CGFunc &func, MemPool &memPool) + : StoreLoadOpt(func, memPool), localAlloc(&memPool), str2MovMap(localAlloc.Adapter()) {} + ~AArch64StoreLoadOpt() override = default; + void Run() final; + void DoStoreLoadOpt(); + void DoLoadZeroToMoveTransfer(const Insn&, short, const InsnSet&) const; + void DoLoadToMoveTransfer(Insn&, short, short, const InsnSet&); + bool CheckStoreOpCode(MOperator opCode) const; + private: + void ProcessStrPair(Insn &insn); + void ProcessStr(Insn &insn); + void GenerateMoveLiveInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq); + void GenerateMoveDeadInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq); + MapleAllocator localAlloc; + /* the max number of mov insn to optimize. */ + static constexpr uint8 kMaxMovNum = 2; + MapleMap str2MovMap; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_STRLDR_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/cfgo.h b/src/maple_be/include/cg/cfgo.h new file mode 100644 index 0000000000000000000000000000000000000000..abe8dbabdb31ae0870619f06aa020710bad74644 --- /dev/null +++ b/src/maple_be/include/cg/cfgo.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_CFGO_H +#define MAPLEBE_INCLUDE_CG_CFGO_H +#include "cg_cfg.h" +#include "optimize_common.h" + +namespace maplebe { +class ChainingPattern : public OptimizationPattern { + public: + explicit ChainingPattern(CGFunc &func) : OptimizationPattern(func) { + patternName = "BB Chaining"; + dotColor = kCfgoChaining; + } + + ~ChainingPattern() override = default; + bool Optimize(BB &curBB) override; + + protected: + bool NoInsnBetween(const BB &from, const BB &to) const; + bool DoSameThing(const BB &bb1, const Insn &last1, const BB &bb2, const Insn &last2) const; + bool MergeFallthuBB(BB &curBB); + bool MergeGotoBB(BB &curBB, BB &sucBB); + bool MoveSuccBBAsCurBBNext(BB &curBB, BB &sucBB); + bool RemoveGotoInsn(BB &curBB, BB &sucBB); + bool ClearCurBBAndResetTargetBB(BB &curBB, BB &sucBB); +}; + +class SequentialJumpPattern : public OptimizationPattern { + public: + explicit SequentialJumpPattern(CGFunc &func) : OptimizationPattern(func) { + patternName = "Sequential Jump"; + dotColor = kCfgoSj; + } + + ~SequentialJumpPattern() override = default; + bool Optimize(BB &curBB) override; + + protected: + void SkipSucBB(BB &curBB, BB &sucBB); +}; + +class FlipBRPattern : public OptimizationPattern { + public: + explicit FlipBRPattern(CGFunc &func) : OptimizationPattern(func) { + patternName = "Condition Flip"; + dotColor = kCfgoFlipCond; + } + + ~FlipBRPattern() override = default; + bool Optimize(BB &curBB) override; + + protected: + void RelocateThrowBB(BB &curBB); +}; + +/* This class represents the scenario that the BB is unreachable. */ +class UnreachBBPattern : public OptimizationPattern { + public: + explicit UnreachBBPattern(CGFunc &func) : OptimizationPattern(func) { + patternName = "Unreachable BB"; + dotColor = kCfgoUnreach; + func.GetTheCFG()->FindAndMarkUnreachable(*cgFunc); + } + + ~UnreachBBPattern() override = default; + bool Optimize(BB &curBB) override; +}; + +/* + * This class represents the scenario that a common jump BB can be duplicated + * to one of its another predecessor. + */ +class DuplicateBBPattern : public OptimizationPattern { + public: + explicit DuplicateBBPattern(CGFunc &func) : OptimizationPattern(func) { + patternName = "Duplicate BB"; + dotColor = kCfgoDup; + } + + ~DuplicateBBPattern() override = default; + bool Optimize(BB &curBB) override; + + private: + static constexpr int kThreshold = 10; +}; + +/* + * This class represents the scenario that a BB contains nothing. + */ +class EmptyBBPattern : public OptimizationPattern { + public: + explicit EmptyBBPattern(CGFunc &func) : OptimizationPattern(func) { + patternName = "Empty BB"; + dotColor = kCfgoEmpty; + } + + ~EmptyBBPattern() override = default; + bool Optimize(BB &curBB) override; +}; + +class CFGOptimizer : public Optimizer { + public: + CFGOptimizer(CGFunc &func, MemPool &memPool) : Optimizer(func, memPool) { + name = "CFGO"; + } + + ~CFGOptimizer() override = default; + void InitOptimizePatterns() override; +}; + +CGFUNCPHASE_CANSKIP(CgDoCfgo, "cfgo") +CGFUNCPHASE_CANSKIP(CgDoPostCfgo, "postcfgo") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_CFGO_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/cg_cfg.h b/src/maple_be/include/cg/cg_cfg.h index b5be681a6f0e4427f5c26050005cbdbcd69a55f1..9cedb0e607cd458e58e317fa79338312c2d53735 100644 --- a/src/maple_be/include/cg/cg_cfg.h +++ b/src/maple_be/include/cg/cg_cfg.h @@ -18,6 +18,52 @@ #include "cgbb.h" namespace maplebe { +class InsnVisitor { + public: + explicit InsnVisitor(CGFunc &func) : cgFunc(&func) {} + + virtual ~InsnVisitor() = default; + CGFunc *GetCGFunc() const { + return cgFunc; + } + + /* + * Precondition: + * The last instruction in bb is either conditional or unconditional jump. + * + * The jump target of bb is modified to the location specified by targetLabel. + */ + virtual void ModifyJumpTarget(LabelIdx targetLabel, BB &bb) = 0; + + /* + * Precondition: + * The last instruction in bb is either conditional or unconditional jump. + * + * The jump target of bb is modified to the location specified by targetOperand. + */ + virtual void ModifyJumpTarget(Operand &targetOperand, BB &bb) = 0; + + /* + * Precondition: + * The last instruction in bb is either a conditional or an unconditional jump. + * The last instruction in newTarget is an unconditional jump. + * + * The jump target of bb is modified to newTarget's jump target. + */ + virtual void ModifyJumpTarget(BB &newTarget, BB &bb) = 0; + /* Check if it requires to add extra gotos when relocate bb */ + virtual MOperator FlipConditionOp(MOperator flippedOp, int &targetIdx) = 0; + virtual Insn *CloneInsn(Insn &originalInsn) = 0; + /* Create a new virtual register operand which has the same type and size as the given one. */ + virtual RegOperand *CreateVregFromReg(const RegOperand ®) = 0; + virtual LabelIdx GetJumpLabel(const Insn &insn) const = 0; + virtual bool IsCompareInsn(const Insn &insn) const = 0; + virtual bool IsCompareAndBranchInsn(const Insn &insn) const = 0; + + private: + CGFunc *cgFunc; +}; /* class InsnVisitor; */ + class CGCFG { public: explicit CGCFG(CGFunc &cgFunc) : cgFunc(&cgFunc) {} @@ -25,6 +71,12 @@ class CGCFG { ~CGCFG() = default; void BuildCFG(); + + void InitInsnVisitor(CGFunc &func); + InsnVisitor *GetInsnModifier() const { + return insnVisitor; + } + static bool AreCommentAllPreds(const BB &bb); bool CanMerge(const BB &merger, const BB &mergee) const; bool BBJudge(const BB &first, const BB &second) const; @@ -33,9 +85,24 @@ class CGCFG { * predecessors should be modified accordingly. */ static void MergeBB(BB &merger, BB &mergee, CGFunc &func); + + /* + * Remove a BB from its position in the CFG. + * Prev, next, preds and sucs are all modified accordingly. + */ + void RemoveBB(BB &bb, bool isGotoIf = false); + /* Skip the successor of bb, directly jump to bb's successor'ssuccessor */ + void RetargetJump(BB &srcBB, BB &targetBB); + /* Loop up if the given label is in the exception tables in LSDA */ static bool InLSDA(LabelIdx label, const EHFunc &ehFunc); static bool InSwitchTable(LabelIdx label, const CGFunc &func); + + RegOperand *CreateVregFromReg(const RegOperand &pReg); + Insn *CloneInsn(Insn &originalInsn); + static BB *GetTargetSuc(BB &curBB, bool branchOnly = false, bool isGotoIf = false); + bool IsCompareAndBranchInsn(const Insn &insn) const; + Insn *FindLastCondBrInsn(BB &bb) const; static void FindAndMarkUnreachable(CGFunc &func); void FlushUnReachableStatusAndRemoveRelations(BB &curBB, const CGFunc &func) const; @@ -44,8 +111,9 @@ class CGCFG { /* cgcfgvisitor */ private: CGFunc *cgFunc; + static InsnVisitor *insnVisitor; static void MergeBB(BB &merger, BB &mergee); }; /* class CGCFG */ } /* namespace maplebe */ -#endif /* MAPLEBE_INCLUDE_CG_CG_CFG_H */ \ No newline at end of file +#endif /* MAPLEBE_INCLUDE_CG_CG_CFG_H */ diff --git a/src/maple_be/include/cg/cg_option.h b/src/maple_be/include/cg/cg_option.h index d14bc338e34b6ca9b5a56589d1e666cef83ebbbc..05a5e55b8b3c5c12e44ba89a70c84047134fdbfb 100644 --- a/src/maple_be/include/cg/cg_option.h +++ b/src/maple_be/include/cg/cg_option.h @@ -450,6 +450,117 @@ class CGOptions : public MapleDriverOptionBase { timePhases = false; } + static void EnableEBO() { + doEBO = true; + } + + static void DisableEBO() { + doEBO = false; + } + + static bool DoEBO() { + return doEBO; + } + + static void EnableCFGO() { + doCFGO = true; + } + + static void DisableCFGO() { + doCFGO = false; + } + + static bool DoCFGO() { + return doCFGO; + } + + static void EnableICO() { + doICO = true; + } + + static void DisableICO() { + doICO = false; + } + + static bool DoICO() { + return doICO; + } + + static void EnableStoreLoadOpt() { + doStoreLoadOpt = true; + } + + static void DisableStoreLoadOpt() { + doStoreLoadOpt = false; + } + + static bool DoStoreLoadOpt() { + return doStoreLoadOpt; + } + + static void EnableGlobalOpt() { + doGlobalOpt = true; + } + + static void DisableGlobalOpt() { + doGlobalOpt = false; + } + + static bool DoGlobalOpt() { + return doGlobalOpt; + } + + static void EnablePrePeephole() { + doPrePeephole = true; + } + + static void DisablePrePeephole() { + doPrePeephole = false; + } + + static bool DoPrePeephole() { + return doPrePeephole; + } + + static void EnablePeephole() { + doPeephole = true; + } + + static void DisablePeephole() { + doPeephole = false; + } + + static bool DoPeephole() { + return doPeephole; + } + + static void EnableSchedule() { + doSchedule = true; + } + + static void DisableSchedule() { + doSchedule = false; + } + + static bool DoSchedule() { + return doSchedule; + } + static bool DoWriteRefFieldOpt() { + return doWriteRefFieldOpt; + } + + static void EnableDumpOptimizeCommonLog() { + dumpOptimizeCommonLog = true; + } + + static void DisableDumpOptimizeCommonLog() { + dumpOptimizeCommonLog = false; + } + + static bool IsDumpOptimizeCommonLog() { + return dumpOptimizeCommonLog; + } + static void EnableCheckArrayStore() { checkArrayStore = true; } @@ -482,6 +593,18 @@ class CGOptions : public MapleDriverOptionBase { return doPIC; } + static void EnableNoDupBB() { + noDupBB = true; + } + + static void DisableNoDupBB() { + noDupBB = false; + } + + static bool IsNoDupBB() { + return noDupBB; + } + static void EnableNoCalleeCFI() { noCalleeCFI = true; } @@ -578,6 +701,43 @@ class CGOptions : public MapleDriverOptionBase { return hotFix; } + static void EnableDebugSched() { + debugSched = true; + } + + static void DisableDebugSched() { + debugSched = false; + } + + static bool IsDebugSched() { + return debugSched; + } + + static void EnableDruteForceSched() { + bruteForceSched = true; + } + + static void DisableDruteForceSched() { + bruteForceSched = false; + } + + static bool IsDruteForceSched() { + return bruteForceSched; + } + + static void EnableSimulateSched() { + simulateSched = true; + } + + static void DisableSimulateSched() { + simulateSched = false; + } + + static bool IsSimulateSched() { + return simulateSched; + } + + static void EnableLongCalls() { genLongCalls = true; } @@ -633,21 +793,36 @@ class CGOptions : public MapleDriverOptionBase { static bool dumpBefore; static bool dumpAfter; static bool timePhases; + static bool doEBO; + static bool doCFGO; + static bool doICO; + static bool doStoreLoadOpt; + static bool doGlobalOpt; + static bool doPrePeephole; + static bool doPeephole; + static bool doSchedule; + static bool doWriteRefFieldOpt; + static bool dumpOptimizeCommonLog; static bool checkArrayStore; - static bool exclusiveEH; static bool doPIC; + static bool noDupBB; static bool noCalleeCFI; static bool emitCyclePattern; static bool insertYieldPoint; static bool mapleLinker; static bool printFunction; - static std::string globalVarProfile; static bool nativeOpt; static bool withDwarf; static bool lazyBinding; static bool hotFix; + /* if true dump scheduling information */ + static bool debugSched; + /* if true do BruteForceSchedule */ + static bool bruteForceSched; + /* if true do SimulateSched */ + static bool simulateSched; /* if true generate adrp/ldr/blr */ static bool genLongCalls; static bool gcOnly; diff --git a/src/maple_be/include/cg/cg_phases.def b/src/maple_be/include/cg/cg_phases.def index 21ed5e2f0f965b907bb5c5cca57b0e9df0067ccd..64801d456f00ada544a2abe0e8a8354e9ccaccce 100644 --- a/src/maple_be/include/cg/cg_phases.def +++ b/src/maple_be/include/cg/cg_phases.def @@ -13,15 +13,31 @@ * See the Mulan PSL v1 for more details. */ FUNCAPHASE(kCGFuncPhaseLIVE, CgDoLiveAnalysis) +FUNCAPHASE(kCGFuncPhaseREACHDEF, CgDoReachingDefinition) +FUNCAPHASE(kCGFuncPhaseCLEARRDINFO, CgDoClearRDInfo) FUNCAPHASE(kCGFuncPhaseLOOP, CgDoLoopAnalysis) FUNCTPHASE(kCGFuncPhaseLAYOUTSF, CgDoLayoutSF) FUNCTPHASE(kCGFuncPhaseCREATESELABEL, CgDoCreateLabel) FUNCTPHASE(kCGFuncPhaseBUILDEHFUNC, CgDoBuildEHFunc) FUNCTPHASE(kCGFuncPhaseHANDLEFUNC, CgDoHandleFunc) +FUNCTPHASE(kCGFuncPhaseEBO, CgDoEbo) +FUNCTPHASE(kCGFuncPhaseEBO1, CgDoEbo1) +FUNCTPHASE(kCGFuncPhasePOSTEBO, CgDoPostEbo) +FUNCTPHASE(kCGFuncPhaseCFGO, CgDoCfgo) +FUNCTPHASE(kCGFuncPhasePOSTCFGO, CgDoPostCfgo) +FUNCTPHASE(kCGFuncPhaseICO, CgDoIco) FUNCTPHASE(kCGFuncPhaseREGALLOC, CgDoRegAlloc) FUNCTPHASE(kCGFuncPhaseMOVREGARGS, CgDoMoveRegArgs) FUNCTPHASE(kCGFuncPhaseGENPROEPILOG, CgDoGenProEpiLog) FUNCTPHASE(kCGFuncPhaseOFFADJFPLR, CgDoFPLROffsetAdjustment) +FUNCTPHASE(kCGFuncPhasePREPEEPHOLE, CgDoPrePeepHole) +FUNCTPHASE(kCGFuncPhasePREPEEPHOLE1, CgDoPrePeepHole1) +FUNCTPHASE(kCGFuncPhasePEEPHOLE0, CgDoPeepHole0) +FUNCTPHASE(kCGFuncPhasePEEPHOLE, CgDoPeepHole) +FUNCTPHASE(kCGFuncPhaseSLO, CgDoStoreLoadOpt) +FUNCTPHASE(kCGFuncPhaseGLOBALOPT, CgDoGlobalOpt) +FUNCTPHASE(kCGFuncPhaseSCHEDULE, CgDoScheduling) FUNCTPHASE(kCGFuncPhaseGENCFI, CgDoGenCfi) FUNCTPHASE(kCGFuncPhaseYIELDPOINT, CgYieldPointInsertion) +FUNCTPHASE(kCGFuncPhaseFIXSHORTBRANCH, CgFixShortBranch) FUNCTPHASE(kCGFuncPhaseEMIT, CgDoEmission) diff --git a/src/maple_be/include/cg/cgfunc.h b/src/maple_be/include/cg/cgfunc.h index 4d898fc01da99ab32d656004dca05ce75432d7a9..337b9c86707162e4121b9a53fe2d49a0dbf013ff 100644 --- a/src/maple_be/include/cg/cgfunc.h +++ b/src/maple_be/include/cg/cgfunc.h @@ -22,6 +22,7 @@ #include "cgbb.h" #include "reg_alloc.h" #include "cfi.h" +#include "reaching.h" #include "cg_cfg.h" /* MapleIR headers. */ #include "mir_parser.h" @@ -109,6 +110,17 @@ class CGFunc { return false; } + void SetRD(ReachingDefinition *paramRd) { + reachingDef = paramRd; + } + + bool GetRDStatus() const { + return (reachingDef != nullptr); + } + + ReachingDefinition *GetRD() { + return reachingDef; + } EHFunc *BuildEHFunc(); virtual void GenSaveMethodInfoCode(BB &bb) = 0; @@ -732,6 +744,7 @@ class CGFunc { volReleaseInsn = insn; } + virtual InsnVisitor *NewInsnModifier() = 0; protected: uint32 firstMapleIrVRegNO = 200; /* positioned after physical regs */ @@ -755,6 +768,8 @@ class CGFunc { bool isVolStore = false; uint32 frequency = 0; DebugInfo *debugInfo = nullptr; /* debugging info */ + ReachingDefinition *reachingDef = nullptr; + int32 dbgCallFrameOffset = 0; CG *cg; MIRModule &mirModule; diff --git a/src/maple_be/include/cg/dependence.h b/src/maple_be/include/cg/dependence.h new file mode 100644 index 0000000000000000000000000000000000000000..7a2467d59da1c1ee2855baf90dba366b0d091b28 --- /dev/null +++ b/src/maple_be/include/cg/dependence.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_DEPENDENCE_H +#define MAPLEBE_INCLUDE_CG_DEPENDENCE_H + +#include "deps.h" +#include "cgbb.h" + +namespace maplebe { +using namespace maple; +namespace { +constexpr maple::uint32 kMaxDependenceNum = 100; +}; + + +class DepAnalysis { + public: + DepAnalysis(CGFunc &func, MemPool &memPool, MAD &mad, bool beforeRA) + : cgFunc(func), memPool(memPool), alloc(&memPool), beforeRA(beforeRA), mad(mad), + lastComments(alloc.Adapter()) {} + + virtual ~DepAnalysis() = default; + + virtual void Run(BB &bb, MapleVector &nodes) = 0; + + const MapleVector &GetLastComments() const { + return lastComments; + } + virtual void CombineClinit(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator) = 0; + virtual void CombineDependence(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator, + bool isMemCombine = false) = 0; + virtual void CombineMemoryAccessPair(DepNode &firstNode, DepNode &secondNode, bool useFirstOffset) = 0; + + virtual const std::string &GetDepTypeName(DepType depType) const = 0; + virtual void DumpDepNode(DepNode &node) const = 0; + virtual void DumpDepLink(DepLink &link, const DepNode *node) const = 0; + + protected: + CGFunc &cgFunc; + MemPool &memPool; + MapleAllocator alloc; + bool beforeRA; + MAD &mad; + MapleVector lastComments; + + virtual void Init(BB &bb, MapleVector &nodes) = 0; + virtual void ClearAllDepData() = 0; + virtual void AnalysisAmbiInsns(BB &bb) = 0; + virtual void AppendRegUseList(Insn &insn, regno_t regNO) = 0; + virtual void AddDependence(DepNode &fromNode, DepNode &toNode, DepType depType) = 0; + virtual void RemoveSelfDeps(Insn &insn) = 0; + virtual void BuildDepsUseReg(Insn &insn, regno_t regNO) = 0; + virtual void BuildDepsDefReg(Insn &insn, regno_t regNO) = 0; + virtual void BuildDepsAmbiInsn(Insn &insn) = 0; + virtual void BuildDepsMayThrowInsn(Insn &insn) = 0; + virtual void BuildDepsUseMem(Insn &insn, MemOperand &memOpnd) = 0; + virtual void BuildDepsDefMem(Insn &insn, MemOperand &memOpnd) = 0; + virtual void BuildDepsMemBar(Insn &insn) = 0; + virtual void BuildDepsSeparator(DepNode &newSepNode, MapleVector &nodes) = 0; + virtual void BuildDepsControlAll(DepNode &depNode, const MapleVector &nodes) = 0; + virtual void BuildDepsAccessStImmMem(Insn &insn, bool isDest) = 0; + virtual void BuildCallerSavedDeps(Insn &insn) = 0; + virtual void BuildDepsBetweenControlRegAndCall(Insn &insn, bool isDest) = 0; + virtual void BuildStackPassArgsDeps(Insn &insn) = 0; + virtual void BuildDepsDirtyStack(Insn &insn) = 0; + virtual void BuildDepsUseStack(Insn &insn) = 0; + virtual void BuildDepsDirtyHeap(Insn &insn) = 0; + virtual DepNode *BuildSeparatorNode() = 0; + virtual bool IfInAmbiRegs(regno_t regNO) const = 0; + virtual bool IsFrameReg(const RegOperand&) const = 0; +}; +} + +#endif /* MAPLEBE_INCLUDE_CG_DEPENDENCE_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/deps.h b/src/maple_be/include/cg/deps.h new file mode 100644 index 0000000000000000000000000000000000000000..56fa270dec379b9780403fbb073868ee1e7e4366 --- /dev/null +++ b/src/maple_be/include/cg/deps.h @@ -0,0 +1,394 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_DEPS_H +#define MAPLEBE_INCLUDE_CG_DEPS_H + +#include "mad.h" +#include "pressure.h" +#include +namespace maplebe { +#define PRINT_STR_VAL(STR, VAL) \ + LogInfo::MapleLogger() << std::left << std::setw(12) << STR << VAL << " | "; +#define PRINT_VAL(VAL) \ + LogInfo::MapleLogger() << std::left << std::setw(12) << VAL << " | "; + +enum DepType : uint8 { + kDependenceTypeTrue, + kDependenceTypeOutput, + kDependenceTypeAnti, + kDependenceTypeControl, + kDependenceTypeMembar, + kDependenceTypeThrow, + kDependenceTypeSeparator, + kDependenceTypeNone +}; + +const std::array kDepTypeName = { + "true-dep", + "output-dep", + "anti-dep", + "control-dep", + "membar-dep", + "throw-dep", + "separator-dep", + "none-dep", +}; + +enum NodeType : uint8 { + kNodeTypeNormal, + kNodeTypeSeparator, + kNodeTypeEmpty +}; + +enum ScheduleState : uint8 { + kNormal, + kReady, + kScheduled, +}; + +class DepNode; + +class DepLink { + public: + DepLink(DepNode &fromNode, DepNode &toNode, DepType typ) : from(fromNode), to(toNode), depType(typ), latency(0) {} + virtual ~DepLink() = default; + + DepNode &GetFrom() const { + return from; + } + DepNode &GetTo() const { + return to; + } + void SetDepType(DepType dType) { + depType = dType; + } + DepType GetDepType() const { + return depType; + } + void SetLatency(uint32 lat) { + latency = lat; + } + uint32 GetLatency() const { + return latency; + } + + private: + DepNode &from; + DepNode &to; + DepType depType; + uint32 latency; +}; + +class DepNode { + public: + bool CanBeScheduled() const; + void OccupyUnits(); + uint32 GetUnitKind() const; + + DepNode(Insn &insn, MapleAllocator &alloc) + : insn(&insn), units(nullptr), reservation(nullptr), unitNum(0), + eStart(0), lStart(0), visit(0), type(kNodeTypeNormal), state(kNormal), index(0), simulateCycle(0), + schedCycle(0), bruteForceSchedCycle(0), validPredsSize(0), validSuccsSize(0), + preds(alloc.Adapter()), succs(alloc.Adapter()), comments(alloc.Adapter()), + cfiInsns(alloc.Adapter()), clinitInsns(alloc.Adapter()), locInsn(nullptr), regPressure(nullptr) {} + + DepNode(Insn &insn, MapleAllocator &alloc, Unit * const *unit, uint32 num, Reservation &rev) + : insn(&insn), units(unit), + reservation(&rev), unitNum(num), eStart(0), lStart(0), visit(0), type(kNodeTypeNormal), state(kNormal), + index(0), simulateCycle(0), schedCycle(0), bruteForceSchedCycle(0), validPredsSize(0), validSuccsSize(0), + preds(alloc.Adapter()), succs(alloc.Adapter()), comments(alloc.Adapter()), cfiInsns(alloc.Adapter()), + clinitInsns(alloc.Adapter()), locInsn(nullptr), regPressure(nullptr) {} + virtual ~DepNode() = default; + + Insn *GetInsn() const { + return insn; + } + void SetInsn(Insn &rvInsn) { + insn = &rvInsn; + } + void SetUnits(Unit * const *unit) { + units = unit; + } + const Unit *GetUnitByIndex(uint32 idx) const { + ASSERT(index < unitNum, "out of units"); + return units[idx]; + } + Reservation *GetReservation() const { + return reservation; + } + void SetReservation(Reservation &rev) { + reservation = &rev; + } + uint32 GetUnitNum() const { + return unitNum; + } + void SetUnitNum(uint32 num) { + unitNum = num; + } + uint32 GetEStart() const { + return eStart; + } + void SetEStart(uint32 start) { + eStart = start; + } + uint32 GetLStart() const { + return lStart; + } + void SetLStart(uint32 start) { + lStart = start; + } + uint32 GetVisit() const { + return visit; + } + void SetVisit(uint32 visitVal) { + visit = visitVal; + } + void IncreaseVisit() { + ++visit; + } + NodeType GetType() const { + return type; + } + void SetType(NodeType nodeType) { + type = nodeType; + } + ScheduleState GetState() const { + return state; + } + void SetState(ScheduleState scheduleState) { + state = scheduleState; + } + uint32 GetIndex() const { + return index; + } + void SetIndex(uint32 idx) { + index = idx; + } + void SetSchedCycle(uint32 cycle) { + schedCycle = cycle; + } + uint32 GetSchedCycle() const { + return schedCycle; + } + void SetSimulateCycle(uint32 cycle) { + simulateCycle = cycle; + } + uint32 GetSimulateCycle() const { + return simulateCycle; + } + void SetBruteForceSchedCycle(uint32 cycle) { + bruteForceSchedCycle = cycle; + } + uint32 GetBruteForceSchedCycle() const { + return bruteForceSchedCycle; + } + void SetValidPredsSize(uint32 validSize) { + validPredsSize = validSize; + } + uint32 GetValidPredsSize() const { + return validPredsSize; + } + void DescreaseValidPredsSize() { + --validPredsSize; + } + void IncreaseValidPredsSize() { + ++validPredsSize; + } + uint32 GetValidSuccsSize() const { + return validSuccsSize; + } + void SetValidSuccsSize(uint32 size) { + validSuccsSize = size; + } + const MapleVector &GetPreds() const { + return preds; + } + void AddPred(DepLink &depLink) { + preds.push_back(&depLink); + } + void RemovePred() { + preds.pop_back(); + } + const MapleVector &GetSuccs() const{ + return succs; + } + void AddSucc(DepLink &depLink) { + succs.push_back(&depLink); + } + void RemoveSucc() { + succs.pop_back(); + } + const MapleVector &GetComments() const { + return comments; + } + void SetComments(MapleVector com) { + comments = com; + } + void AddComments(Insn &insn) { + comments.push_back(&insn); + } + void ClearComments() { + comments.clear(); + } + const MapleVector &GetCfiInsns() const { + return cfiInsns; + } + void SetCfiInsns(MapleVector insns) { + cfiInsns = insns; + } + void AddCfiInsn(Insn &insn) { + cfiInsns.push_back(&insn); + } + void ClearCfiInsns() { + cfiInsns.clear(); + } + const MapleVector &GetClinitInsns() const { + return clinitInsns; + } + void SetClinitInsns(MapleVector insns) { + clinitInsns = insns; + } + void AddClinitInsn(Insn &insn) { + clinitInsns.push_back(&insn); + } + const RegPressure *GetRegPressure() const { + return regPressure; + } + void SetRegPressure(RegPressure &pressure) { + regPressure = &pressure; + } + void DumpRegPressure() const { + if (regPressure) { + regPressure->DumpRegPressure(); + } + } + void InitPressure() { + regPressure->InitPressure(); + } + const int32 *GetPressure() const { + return regPressure->GetPressure(); + } + void SetPressure(int32 &pressure) { + regPressure->SetPressure(&pressure); + } + void IncPressureByIndex(int32 idx) { + regPressure->IncPressureByIndex(idx); + } + void DecPressureByIndex(int32 idx) { + regPressure->DecPressureByIndex(idx); + } + void AddUseReg(regno_t reg) { + regPressure->AddUseReg(reg); + } + void AddDefReg(regno_t reg) { + regPressure->AddDefReg(reg); + } + void SetRegUses(regno_t regNO, RegList ®List) { + regPressure->SetRegUses(regNO, ®List); + } + int32 GetIncPressure() const { + return regPressure->GetIncPressure(); + } + void SetIncPressure(bool value) { + regPressure->SetIncPressure(value); + } + int32 GetMaxDepth() const { + return regPressure->GetMaxDepth(); + } + void SetMaxDepth(int32 value) { + regPressure->SetMaxDepth(value); + } + int32 GetNear() const { + return regPressure->GetNear(); + } + void SetNear(int32 value) { + regPressure->SetNear(value); + } + int32 GetPriority() const { + return regPressure->GetPriority(); + } + void SetPriority(int32 value) { + regPressure->SetPriority(value); + } + const MapleSet &GetUses() const { + return regPressure->GetUses(); + } + const MapleSet &GetDefs() const { + return regPressure->GetDefs(); + } + const MapleMap &GetRegUses() const { + return regPressure->GetRegUses(); + } + + const Insn *GetLocInsn() const { + return locInsn; + } + void SetLocInsn(const Insn &insn) { + locInsn = &insn; + } + + /* printf dep-node's information of scheduling */ + void DumpSchedInfo() const { + PRINT_STR_VAL("estart: ", eStart); + PRINT_STR_VAL("lstart: ", lStart); + PRINT_STR_VAL("visit: ", visit); + PRINT_STR_VAL("state: ", state); + PRINT_STR_VAL("index: ", index); + PRINT_STR_VAL("validPredsSize: ", validPredsSize); + PRINT_STR_VAL("validSuccsSize: ", validSuccsSize); + LogInfo::MapleLogger() << '\n'; + } + + private: + Insn *insn; + Unit * const *units; + Reservation *reservation; + uint32 unitNum; + uint32 eStart; + uint32 lStart; + uint32 visit; + NodeType type; + ScheduleState state; + uint32 index; + uint32 simulateCycle; + uint32 schedCycle; + uint32 bruteForceSchedCycle; + + /* For scheduling, denotes unscheduled preds/succs number. */ + uint32 validPredsSize; + uint32 validSuccsSize; + + /* Dependence links. */ + MapleVector preds; + MapleVector succs; + + /* Non-machine instructions prior to insn, such as comments. */ + MapleVector comments; + + /* Non-machine instructions which follows insn, such as cfi instructions. */ + MapleVector cfiInsns; + + /* Special instructions which follows insn, such as clinit instructions. */ + MapleVector clinitInsns; + + /* loc insn which indicate insn location in source file */ + const Insn *locInsn; + + /* For register pressure analysis */ + RegPressure *regPressure; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_DEPS_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/ebo.h b/src/maple_be/include/cg/ebo.h new file mode 100644 index 0000000000000000000000000000000000000000..19d81d910d53d3674628ace71443e7ddc6c9dc64 --- /dev/null +++ b/src/maple_be/include/cg/ebo.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_EBO_H +#define MAPLEBE_INCLUDE_CG_EBO_H + +#include "cg_phase.h" +#include "cgbb.h" +#include "live.h" + +namespace maplebe { +namespace { +constexpr uint32 kEboDefaultMemHash = 0; +constexpr uint32 kEboNoAliasMemHash = 1; +constexpr uint32 kEboSpillMemHash = 2; +constexpr uint32 kEboCopyInsnHash = 3; +constexpr uint32 kEboReservedInsnHash = 4; +constexpr uint32 kEboMaxExpInsnHash = 1024; +constexpr uint32 kEboMaxOpndHash = 521; +constexpr uint32 kEboMaxInsnHash = kEboReservedInsnHash + kEboMaxExpInsnHash; +}; + +#define EBO_EXP_INSN_HASH(val) ((kEboMaxExpInsnHash - 1ULL) & (static_cast(val) >> 6)) + +/* forward decls */ +class InsnInfo; + +struct OpndInfo { + explicit OpndInfo(Operand &opnd) : opnd(&opnd) {} + + virtual ~OpndInfo() = default; + + int32 hashVal = 0; /* Mem operand is placed in hash table, this is the hashVal of it, and otherwise -1. */ + Operand *opnd; /* Operand */ + Operand *replacementOpnd = nullptr; /* Rename opnd with this new name. */ + OpndInfo *replacementInfo = nullptr; /* Rename opnd with this info. */ + BB *bb = nullptr; /* The Definining bb. */ + Insn *insn = nullptr; /* The Defining insn. */ + InsnInfo *insnInfo = nullptr; + bool redefinedInBB = false; /* A following definition exisit in bb. */ + bool redefined = false; /* A following definition exisit. */ + OpndInfo *same = nullptr; /* Other definitions of the same operand. */ + OpndInfo *prev = nullptr; + OpndInfo *next = nullptr; + OpndInfo *hashNext = nullptr; + int32 refCount = 0; /* Number of references to the operand. */ +}; + +struct MemOpndInfo : public OpndInfo { + explicit MemOpndInfo(Operand &opnd) : OpndInfo(opnd) {} + + ~MemOpndInfo() override = default; + + OpndInfo *GetBaseInfo() const { + return base; + } + + OpndInfo *GetOffsetInfo() const{ + return offset; + } + + void SetBaseInfo(OpndInfo &baseInfo) { + base = &baseInfo; + } + + void SetOffsetInfo(OpndInfo &offInfo) { + offset = &offInfo; + } + + private: + OpndInfo *base = nullptr; + OpndInfo *offset = nullptr; +}; + +class InsnInfo { + public: + InsnInfo(MemPool &memPool, Insn &insn) + : alloc(&memPool), bb(insn.GetBB()), insn(&insn), result(alloc.Adapter()), + origOpnd(alloc.Adapter()), optimalOpnd(alloc.Adapter()) {} + + virtual ~InsnInfo() = default; + MapleAllocator alloc; + uint32 hashIndex = 0; + bool mustNotBeRemoved = false; /* Some condition requires this insn. */ + BB *bb; /* The defining bb. */ + Insn *insn; /* The defining insn. */ + InsnInfo *same = nullptr; /* Other insns with the same hash value. */ + InsnInfo *prev = nullptr; + InsnInfo *next = nullptr; + MapleVector result; /* Result array. */ + MapleVector origOpnd; + MapleVector optimalOpnd; +}; + +class Ebo { + public: + Ebo(CGFunc &func, MemPool &memPool, LiveAnalysis *live, bool before, const std::string &phase) + : cgFunc(&func), + beforeRegAlloc(before), + phaseName(phase), + live(live), + eboMp(&memPool), + eboAllocator(&memPool), + visitedBBs(eboAllocator.Adapter()), + vRegInfo(std::less(), eboAllocator.Adapter()), + exprInfoTable(eboAllocator.Adapter()), + insnInfoTable(eboAllocator.Adapter()) {} + + virtual ~Ebo() = default; + + MemOpndInfo *GetMemInfo(InsnInfo &insnInfo); + void SetInsnInfo(uint32 hashVal, InsnInfo &info) { + ASSERT(hashVal < insnInfoTable.size(), "hashVal out of insnInfoTable range"); + insnInfoTable.at(hashVal) = &info; + } + + void IncRef(OpndInfo &info) const { + ++info.refCount; + } + + void DecRef(OpndInfo &info) const { + --info.refCount; + } + + void EnlargeSpaceForLA(Insn &csetInsn); + bool IsSaveReg(const Operand &opnd) const; + bool IsFrameReg(Operand &opnd) const; + bool OperandEqual(const Operand &op1, const Operand &op2) const; + Operand *GetZeroOpnd(uint32 size) const; + bool IsPhysicalReg(const Operand &opnd) const; + bool HasAssignedReg(const Operand &opnd) const; + bool IsOfSameClass(const Operand &op0, const Operand &op1) const; + bool OpndAvailableInBB(const BB &bb, OpndInfo &info); + bool IsNotVisited(const BB &bb) { + return !visitedBBs.at(bb.GetId()); + }; + + void SetBBVisited(const BB &bb) { + visitedBBs.at(bb.GetId()) = true; + }; + + void UpdateOpndInfo(const Operand &opnd, OpndInfo &opndInfo, OpndInfo *newInfo, int32 hashVal); + void SetOpndInfo(const Operand &opnd, OpndInfo *opndInfo, int32 hashVal); + bool RegistersIdentical(const Operand &opnd0, const Operand &opnd1) const; + OpndInfo *GetOpndInfo(const Operand &opnd, int32 hashVal) const; + OpndInfo *GetNewOpndInfo(BB &bb, Insn *insn, Operand &opnd, int32 hashVal); + OpndInfo *OperandInfoUse(BB ¤tBB, Operand &localOpnd); + InsnInfo *GetNewInsnInfo(Insn &insn); + int32 ComputeOpndHash(const Operand &opnd) const; + uint32 ComputeHashVal(const Insn &insn, const MapleVector &opndInfo) const; + void MarkOpndLiveIntoBB(const Operand &opnd, BB &intoBB, BB &outOfBB) const; + bool LiveOutOfBB(const Operand &opnd, const BB &bb) const; + void RemoveInsn(InsnInfo &insnInfo); + void RemoveUses(uint32 opndNum, const MapleVector &origInfo); + void HashInsn(Insn &insn, const MapleVector &origInfo, const MapleVector &opndInfo); + void BuildAllInfo(BB &bb); + InsnInfo *LocateInsnInfo(const OpndInfo &info); + void RemoveUnusedInsns(BB &bb, bool normal); + void UpdateNextInfo(const OpndInfo &opndInfo); + void BackupOpndInfoList(OpndInfo *saveLast); + void BackupInsnInfoList(InsnInfo *saveLast); + void AddBB2EB(BB &bb); + void EboInit(); + void EboProcessSingleBB(); + void EboProcess(); + void Run(); + std::string PhaseName() const { + return phaseName; + } + + protected: + CGFunc *cgFunc; + bool beforeRegAlloc; /* True if perform Ebo before register allocation. */ + virtual OpndInfo *OperandInfoDef(BB ¤tBB, Insn ¤tInsn, Operand &localOpnd) = 0; + virtual const RegOperand &GetRegOperand(const Operand &opnd) const = 0; + virtual bool IsGlobalNeeded(Insn &insn) const = 0; + virtual bool IsFmov(const Insn &insn) const = 0; + virtual bool SpecialSequence(Insn &insn, const MapleVector &origInfos) = 0; + virtual bool DoConstProp(Insn &insn, uint32 i, Operand &opnd) = 0; + virtual bool DoConstantFold(Insn &insn, const MapleVector &opnds) = 0; + virtual bool ConstantOperand(Insn &insn, const MapleVector &opnds, + const MapleVector &opndInfo) = 0; + virtual int32 GetOffsetVal(const MemOperand &mem) const = 0; + virtual bool OperandEqSpecial(const Operand &op1, const Operand &op2) const = 0; + virtual void BuildCallerSaveRegisters() = 0; + virtual void DefineCallerSaveRegisters(InsnInfo &insnInfo) = 0; + virtual void DefineReturnUseRegister(Insn &insn) = 0; + virtual void DefineCallUseSpecialRegister(Insn &insn) = 0; + virtual void DefineClinitSpecialRegisters(InsnInfo &insnInfo) = 0; + virtual bool IsMovToSIMDVmov(Insn &insn, const Insn &replaceInsn) const = 0; + virtual bool ChangeLdrMop(Insn &insn, const Operand &opnd) const = 0; + virtual bool IsAdd(const Insn &insn) const = 0; + virtual bool IsClinitCheck(const Insn &insn) const = 0; + virtual bool IsLastAndBranch(BB &bb, Insn &insn) const = 0; + virtual bool ResIsNotDefAndUse(Insn &insn) const = 0; + OpndInfo *BuildMemOpndInfo(BB &bb, Insn &insn, Operand &opnd, int32 opndIndex); + OpndInfo *BuildOperandInfo(BB &bb, Insn &insn, Operand &opnd, uint32 opndIndex, MapleVector &origInfos); + bool ForwardPropagateOpnd(Insn &insn, Operand *&opnd, uint32 opndIndex, OpndInfo *&opndInfo, + MapleVector &origInfos); + void SimplifyInsn(Insn &insn, bool &insnReplaced, bool opndsConstant, const MapleVector &opnds, + const MapleVector &opndInfos, const MapleVector &origInfos); + void FindRedundantInsns(BB &bb, Insn *&insn, const Insn *prev, bool insnReplaced, + MapleVector &opnds, MapleVector &opndInfos, + const MapleVector &origInfos); + void PreProcessSpecialInsn(Insn &insn); + + std::string phaseName; + LiveAnalysis *live; + uint32 bbNum = 0; /* bb numbers for an extend block. */ + MemPool *eboMp; + MapleAllocator eboAllocator; + MapleVector visitedBBs; + OpndInfo *firstOpndInfo = nullptr; + OpndInfo *lastOpndInfo = nullptr; + InsnInfo *firstInsnInfo = nullptr; + InsnInfo *lastInsnInfo = nullptr; + MapleMap vRegInfo; + MapleVector exprInfoTable; + MapleVector insnInfoTable; +}; + +CGFUNCPHASE_CANSKIP(CgDoEbo, "ebo") +CGFUNCPHASE_CANSKIP(CgDoEbo1, "ebo1") +CGFUNCPHASE_CANSKIP(CgDoPostEbo, "postebo") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_EBO_H */ diff --git a/src/maple_be/include/cg/global.h b/src/maple_be/include/cg/global.h new file mode 100644 index 0000000000000000000000000000000000000000..f95fc03434f7a145cd24ae52751094f790d0639d --- /dev/null +++ b/src/maple_be/include/cg/global.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_GLOBAL_H +#define MAPLEBE_INCLUDE_CG_GLOBAL_H + +#include "cg_phase.h" + +namespace maplebe { +class GlobalOpt { + public: + explicit GlobalOpt(CGFunc &func) : cgFunc(func) {} + virtual ~GlobalOpt() = default; + virtual void Run() {} + std::string PhaseName() const { + return "globalopt"; + } + + protected: + /* if the number of bbs is more than 500 or the number of insns is more than 9000, don't optimize. */ + static constexpr uint32 kMaxBBNum = 500; + static constexpr uint32 kMaxInsnNum = 9000; + CGFunc &cgFunc; +}; + +CGFUNCPHASE_CANSKIP(CgDoGlobalOpt, "globalopt") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_GLOBAL_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/ico.h b/src/maple_be/include/cg/ico.h new file mode 100644 index 0000000000000000000000000000000000000000..41a223d4e18d35626b6fdc8ca3d0eb16fdc494e2 --- /dev/null +++ b/src/maple_be/include/cg/ico.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_ICO_H +#define MAPLEBE_INCLUDE_CG_ICO_H +#include "optimize_common.h" +#include "live.h" + +namespace maplebe { +class IfConversionOptimizer : public Optimizer { + public: + IfConversionOptimizer(CGFunc &func, MemPool &memPool) : Optimizer(func, memPool) { + name = "ICO"; + } + + ~IfConversionOptimizer() override = default; +}; + +/* If-Then-Else pattern */ +class ICOPattern : public OptimizationPattern { + public: + explicit ICOPattern(CGFunc &func) : OptimizationPattern(func) { + dotColor = kIcoIte; + patternName = "If-Then-Else"; + } + ~ICOPattern() override = default; + static constexpr int kThreshold = 2; + bool Optimize(BB &curBB) override; + + protected: + virtual bool DoOpt(BB &cmpBB, BB *ifBB, BB *elseBB, BB &joinBB) = 0; + Insn *FindLastCmpInsn(BB &bb) const; +}; + +CGFUNCPHASE_CANSKIP(CgDoIco, "ico") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_ICO_H */ diff --git a/src/maple_be/include/cg/insn.h b/src/maple_be/include/cg/insn.h index eb24e5462b5702431bbef8f4a47ee6dfc337b40a..1f99e9fec23e7713ad53aa47b10d02c36e476335 100644 --- a/src/maple_be/include/cg/insn.h +++ b/src/maple_be/include/cg/insn.h @@ -33,6 +33,8 @@ using MOperator = uint32; class BB; class CG; class Emitter; +class DepNode; + class Insn { public: @@ -549,6 +551,18 @@ class Insn { return clearStackOffset[0] != -1 || clearStackOffset[1] != -1; } + void SetDepNode(DepNode &depNode) { + this->depNode = &depNode; + } + + DepNode *GetDepNode() { + return depNode; + } + + const DepNode *GetDepNode() const { + return depNode; + } + void InitWithOriginalInsn(const Insn &originalInsn, MemPool &memPool) { prev = originalInsn.prev; next = originalInsn.next; @@ -585,6 +599,7 @@ class Insn { uint32 retSize = 0; /* Byte size of the return value if insn is a call. */ /* record the stack cleared by MCC_ClearLocalStackRef or MCC_DecRefResetPair */ int64 clearStackOffset[kMaxStackOffsetSize] = { -1, -1 }; + DepNode *depNode = nullptr; /* For dependence analysis, pointing to a dependence node. */ MapleString comment; bool isFrameDef = false; }; diff --git a/src/maple_be/include/cg/optimize_common.h b/src/maple_be/include/cg/optimize_common.h new file mode 100644 index 0000000000000000000000000000000000000000..5386f13f8f9f791fe6dc6328cb858c54cb200f74 --- /dev/null +++ b/src/maple_be/include/cg/optimize_common.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_OPTIMIZE_COMMON_H +#define MAPLEBE_INCLUDE_CG_OPTIMIZE_COMMON_H +#include "cgfunc.h" + +namespace maplebe { +const std::string kCfgoChaining = "red"; +const std::string kCfgoSj = "burlywood1"; +const std::string kCfgoFlipCond = "cadetblue1"; +const std::string kCfgoAlways = "green"; +const std::string kCfgoUnreach = "yellow"; +const std::string kCfgoDup = "orange"; +const std::string kCfgoEmpty = "purple"; +const std::string kIcoIte = "blue"; /* if conversion optimization, if-then-else */ +const std::string kIcoIt = "grey"; /* if conversion optimization, if-then-else */ + +class OptimizationPattern { + public: + explicit OptimizationPattern(CGFunc &func) + : patternName(func.GetMemoryPool()), + cgFunc(&func), + dotColor(func.GetMemoryPool()) {} + virtual ~OptimizationPattern() = default; + + bool IsKeepPosition() const { + return keepPosition; + } + + void SetKeepPosition(bool flag) { + keepPosition = flag; + } + + bool IsLabelInLSDAOrSwitchTable(LabelIdx label) const { + return cgFunc->GetTheCFG()->InLSDA(label, *cgFunc->GetEHFunc()) || + cgFunc->GetTheCFG()->InSwitchTable(label, *cgFunc); + } + + void Search2Op(bool checkOnly); + virtual bool Optimize(BB &curBB) = 0; + + protected: + void Log(uint32 bbID); + + bool keepPosition = false; + MapleString patternName; + CGFunc *cgFunc; + MapleString dotColor; + bool checkOnly = false; +}; + +class Optimizer { + public: + Optimizer(CGFunc &func, MemPool &memPool) + : cgFunc(&func), + name(nullptr), + memPool(&memPool), + alloc(&memPool), + diffPassPatterns(alloc.Adapter()), + singlePassPatterns(alloc.Adapter()) { + func.GetTheCFG()->InitInsnVisitor(func); + } + + virtual ~Optimizer() = default; + void Run(const std::string &funcName, bool checkOnly = false); + virtual void InitOptimizePatterns() = 0; + + protected: + CGFunc *cgFunc; + const char *name; + MemPool *memPool; + MapleAllocator alloc; + /* patterns need to run in different passes of cgFunc */ + MapleVector diffPassPatterns; + /* patterns can run in a single pass of cgFunc */ + MapleVector singlePassPatterns; +}; + +class OptimizeLogger { + public: + static OptimizeLogger &GetLogger() { + static OptimizeLogger instance; + return instance; + } + + void Log(const std::string &patternName); + void ClearLocal(); + void Print(const std::string &funcName); + + private: + OptimizeLogger() = default; + + ~OptimizeLogger() = default; + + OptimizeLogger(const OptimizeLogger&); + OptimizeLogger &operator=(const OptimizeLogger&); + + std::map globalStat; + std::map localStat; +}; + +class DotGenerator { + public: + static void SetColor(uint32 bbID, const std::string &color); + static void GenerateDot(const std::string &preFix, CGFunc &cgFunc, const MIRModule &mod, + bool includeEH = false, regno_t vReg = 0); + private: + static std::map coloringMap; + static std::string GetFileName(const MIRModule &mirModule, const std::string &filePreFix); + static void DumpEdge(const CGFunc &cgFunction, std::ofstream &cfgFileOfStream, bool isIncludeEH); + static void DumpBBInstructions(const CGFunc &cgFunction, regno_t vReg, std::ofstream &cfgFile); + static bool FoundListOpndRegNum(ListOperand &listOpnd, const Insn &insnObj, regno_t vReg); + static bool FoundMemAccessOpndRegNum(const MemOperand &memOperand, const Insn &insnObj, regno_t vReg); + static bool FoundNormalOpndRegNum(RegOperand ®Opnd, const Insn &insnObj, regno_t vReg); +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_OPTIMIZE_COMMON_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/peep.h b/src/maple_be/include/cg/peep.h new file mode 100644 index 0000000000000000000000000000000000000000..9fe8899f014acc90985e11dfd9b69b5b87844b19 --- /dev/null +++ b/src/maple_be/include/cg/peep.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_PEEP_H +#define MAPLEBE_INCLUDE_CG_PEEP_H + +#include "cg.h" +#include "optimize_common.h" + +namespace maplebe { +enum ReturnType : uint8 { + kResUseFirst, + kResDefFirst, + kResNotFind +}; + +class PeepPattern { + public: + explicit PeepPattern(CGFunc &oneCGFunc) : cgFunc(oneCGFunc) {} + virtual ~PeepPattern() = default; + virtual void Run(BB &bb, Insn &insn) = 0; + /* optimization support function */ + bool IfOperandIsLiveAfterInsn(const RegOperand ®Opnd, Insn &insn); + bool FindRegLiveOut(const RegOperand ®Opnd, const BB &bb); + bool CheckOpndLiveinSuccs(const RegOperand ®Opnd, const BB &bb) const; + ReturnType IsOpndLiveinBB(const RegOperand ®Opnd, const BB &bb) const; + int logValueAtBase2(int64 val) const; + bool IsMemOperandOptPattern(const Insn &insn, Insn &nextInsn); + + protected: + CGFunc &cgFunc; +}; + +class PeepHoleOptimizer { + public: + explicit PeepHoleOptimizer(CGFunc *cf) : cgFunc(cf) { + cg = cgFunc->GetCG(); + } + ~PeepHoleOptimizer() = default; + void Peephole0(); + void PeepholeOpt(); + void PrePeepholeOpt(); + void PrePeepholeOpt1(); + + private: + CGFunc *cgFunc; + CG *cg; +}; /* class PeepHoleOptimizer */ + +class PeepPatternMatch { + public: + PeepPatternMatch(CGFunc &oneCGFunc, MemPool *memPool) + : optOwnMemPool(memPool), + peepAllocator(memPool), + optimizations(peepAllocator.Adapter()), + cgFunc(oneCGFunc) {} + virtual ~PeepPatternMatch() = default; + virtual void Run(BB &bb, Insn &insn) = 0; + virtual void InitOpts() = 0; + protected: + MemPool *optOwnMemPool; + MapleAllocator peepAllocator; + MapleVector optimizations; + CGFunc &cgFunc; +}; + +class PeepOptimizer { + public: + PeepOptimizer(CGFunc &oneCGFunc, MemPool *memPool) + : cgFunc(oneCGFunc), + peepOptMemPool(memPool) { + index = 0; + } + ~PeepOptimizer() = default; + template + void Run(); + static int32 index; + + private: + CGFunc &cgFunc; + MemPool *peepOptMemPool; +}; + +CGFUNCPHASE_CANSKIP(CgDoPrePeepHole, "prepeephole") +CGFUNCPHASE_CANSKIP(CgDoPrePeepHole1, "prepeephole1") +CGFUNCPHASE_CANSKIP(CgDoPeepHole0, "peephole0") +CGFUNCPHASE_CANSKIP(CgDoPeepHole, "peephole") +} /* namespace maplebe */ +#endif /* MAPLEBE_INCLUDE_CG_PEEP_H */ diff --git a/src/maple_be/include/cg/pressure.h b/src/maple_be/include/cg/pressure.h new file mode 100644 index 0000000000000000000000000000000000000000..1544ff328d9c08109f3cac5b56971c19d749305a --- /dev/null +++ b/src/maple_be/include/cg/pressure.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_PRESSURE_H +#define MAPLEBE_INCLUDE_CG_PRESSURE_H + +#include "cgbb.h" +#include "cgfunc.h" + +namespace maplebe { +struct RegList { + Insn *insn; + RegList *next; +}; + +#define FOR_ALL_REGCLASS(i) \ + for (int32 i = 0; i < RegPressure::GetMaxRegClassNum(); ++i) + +class RegPressure { + public: + explicit RegPressure(MapleAllocator &alloc) + : uses(alloc.Adapter()), defs(alloc.Adapter()), + regUses(alloc.Adapter()) {} + + virtual ~RegPressure() = default; + + void DumpRegPressure() const; + + void AddUseReg(regno_t regNO) { + uses.insert(regNO); + } + + void AddDefReg(regno_t regNO) { + defs.insert(regNO); + } + + void SetRegUses(regno_t regNO, RegList *regList) { + regUses.insert(std::pair(regNO, regList)); + } + + static void SetMaxRegClassNum(int32 maxClassNum) { + maxRegClassNum = maxClassNum; + } + + static int32 GetMaxRegClassNum() { + return maxRegClassNum; + } + + int32 GetPriority() const { + return priority; + } + + void SetPriority(int32 value) { + priority = value; + } + + int32 GetMaxDepth() const { + return maxDepth; + } + + void SetMaxDepth(int32 value) { + maxDepth = value; + } + + int32 GetNear() const { + return near; + } + + void SetNear(int32 value) { + near = value; + } + + int32 GetIncPressure() const { + return incPressure; + } + + void SetIncPressure(bool value) { + incPressure = value; + } + + const int32 *GetPressure() const { + return pressure; + } + + void SetPressure(int32 *pressure) { + this->pressure = pressure; + } + + void IncPressureByIndex(int32 index) { + ++pressure[index]; + } + + void DecPressureByIndex(int32 index) { + --pressure[index]; + } + + void InitPressure() { + FOR_ALL_REGCLASS(i) { + pressure[i] = 0; + incPressure = false; + } + } + + const MapleSet &GetUses() const { + return uses; + } + + const MapleSet &GetDefs() const { + return defs; + } + + const MapleMap &GetRegUses() const { + return regUses; + } + + private: + MapleSet uses; + MapleSet defs; + /* save reglist of every uses'register */ + MapleMap regUses; + int32 *pressure = nullptr; + /* max number of reg's class */ + static int32 maxRegClassNum; + int32 priority = 0; + int32 maxDepth = 0; + int32 near = 0; + /* if a type register increase then set incPressure as true. */ + bool incPressure = false; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_PRESSURE_H */ \ No newline at end of file diff --git a/src/maple_be/include/cg/reaching.h b/src/maple_be/include/cg/reaching.h new file mode 100644 index 0000000000000000000000000000000000000000..546e52deda07722eb2380ef406d45a22ad0fcfd2 --- /dev/null +++ b/src/maple_be/include/cg/reaching.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_REACHING_H +#define MAPLEBE_INCLUDE_CG_REACHING_H + +#include "cg_phase.h" +#include "cgbb.h" +#include "datainfo.h" + +namespace maplebe { +enum VisitStatus : uint8 { + kNotVisited, + kNormalVisited, + kEHVisited +}; + +enum AnalysisType : uint8 { + kRDRegAnalysis = 1, + kRDMemAnalysis = 2, + kRDAllAnalysis = 3 +}; + +enum DumpType : uint32 { + kDumpAll = 0xFFF, + kDumpRegGen = 0x001, + kDumpRegUse = 0x002, + kDumpRegIn = 0x004, + kDumpRegOut = 0x008, + kDumpMemGen = 0x010, + kDumpMemIn = 0x020, + kDumpMemOut = 0x040, + kDumpMemUse = 0x080, + kDumpBBCGIR = 0x100 +}; + +class ReachingDefinition : public AnalysisResult { + public: + ReachingDefinition(CGFunc &func, MemPool &memPool); + ~ReachingDefinition() override = default; + void AnalysisStart(); + void Dump(uint32) const; + void DumpInfo(const BB&, DumpType) const; + void DumpBBCGIR(const BB&) const; + void ClearDefUseInfo(); + void UpdateInOut(BB &changedBB); + void UpdateInOut(BB &changedBB, bool isReg); + void SetAnalysisMode(AnalysisType analysisMode) { + mode = analysisMode; + } + + bool OnlyAnalysisReg() const { + return mode == kRDRegAnalysis; + } + + uint32 GetMaxInsnNO() const { + return maxInsnNO; + } + + size_t GetRegSize(const BB &bb) const { + return regUse[bb.GetId()]->Size(); + } + + bool CheckRegGen(const BB &bb, uint32 regNO) const { + return regGen[bb.GetId()]->TestBit(regNO); + } + + void EnlargeRegCapacity(uint32 size); + bool IsFrameReg(const Operand &opnd) const; + InsnSet FindUseForRegOpnd(Insn &insn, uint32 indexOrRegNO, bool isRegNO) const; + bool RegIsLiveBetweenInsn(uint32 regNO, Insn &startInsn, Insn &endInsn) const; + bool IsLiveInAllPathBB(uint32 regNO, const BB &startBB, const BB &endBB, std::vector &visitedBB) const; + bool HasCallBetweenDefUse(const Insn &defInsn, const Insn &useInsn) const; + + virtual void InitGenUse(BB &bb, bool firstTime = true) = 0; + virtual InsnSet FindDefForMemOpnd(Insn &insn, uint32 indexOrOffset, bool isOffset = false) const = 0; + virtual InsnSet FindUseForMemOpnd(Insn &insn, uint8 index, bool secondMem = false) const = 0; + virtual std::vector FindMemDefBetweenInsn(uint32 offset, const Insn *startInsn, Insn *endInsn) const = 0; + virtual std::vector FindRegDefBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn) const = 0; + virtual bool FindRegUseBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn, InsnSet &useInsnSet) const = 0; + virtual bool FindMemUseBetweenInsn(uint32 offset, Insn *startInsn, const Insn *endInsn, + InsnSet &useInsnSet) const = 0; + virtual InsnSet FindDefForRegOpnd(Insn &insn, uint32 indexOrRegNO, bool isRegNO = false) const = 0; + static constexpr int32 kWordByteNum = 4; + static constexpr int32 kDoubleWordByteNum = 8; + /* to save storage space, the offset of stack memory is devided by 4 and then saved in DataInfo */ + static constexpr int32 kMemZoomSize = 4; + /* number the insn interval 3. make sure no repeated insn number when new insn inserted */ + static constexpr uint32 kInsnNoInterval = 3; + + protected: + virtual void InitStartGen() = 0; + virtual void InitEhDefine(BB &bb) = 0; + virtual void GenAllCallerSavedRegs(BB &bb) = 0; + virtual void AddRetPseudoInsn(BB &bb) = 0; + virtual void AddRetPseudoInsns() = 0; + virtual int32 GetStackSize() const = 0; + virtual bool IsCallerSavedReg(uint32 regNO) const = 0; + virtual void FindRegDefInBB(uint32 regNO, BB &bb, InsnSet &defInsnSet) const = 0; + virtual void FindMemDefInBB(uint32 offset, BB &bb, InsnSet &defInsnSet) const = 0; + virtual void DFSFindDefForRegOpnd(const BB &startBB, uint32 regNO, std::vector &visitedBB, + InsnSet &defInsnSet) const = 0; + virtual void DFSFindDefForMemOpnd(const BB &startBB, uint32 offset, std::vector &visitedBB, + InsnSet &defInsnSet) const = 0; + void DFSFindUseForMemOpnd(const BB &startBB, uint32 offset, std::vector &visitedBB, + InsnSet &useInsnSet, bool onlyFindForEhSucc) const; + CGFunc *cgFunc; + MapleAllocator rdAlloc; + MapleVector pseudoInsns; + AnalysisType mode = kRDRegAnalysis; + BB *firstCleanUpBB = nullptr; + std::vector regGen; + std::vector regUse; + std::vector regIn; + std::vector regOut; + std::vector memGen; + std::vector memUse; + std::vector memIn; + std::vector memOut; + const uint32 kMaxBBNum; + private: + void Initialize(); + void InitDataSize(); + void BuildInOutForFuncBody(); + void BuildInOutForCleanUpBB(); + void BuildInOutForCleanUpBB(bool isReg, const std::set &index); + void InitRegAndMemInfo(const BB &bb); + void InitOut(const BB &bb); + bool GenerateIn(const BB &bb); + bool GenerateIn(const BB &bb, const std::set &index, const bool isReg); + bool GenerateOut(const BB &bb); + bool GenerateOut(const BB &bb, const std::set &index, const bool isReg); + bool GenerateInForFirstCleanUpBB(); + bool GenerateInForFirstCleanUpBB(bool isReg, const std::set &index); + void DFSFindUseForRegOpnd(const BB &startBB, uint32 regNO, std::vector &visitedBB, + InsnSet &useInsnSet, bool onlyFindForEhSucc) const; + bool RegIsUsedInOtherBB(const BB &startBB, uint32 regNO, std::vector &visitedBB) const; + bool RegHasUsePoint(uint32 regNO, Insn &startInsn) const; + bool CanReachEndBBFromCurrentBB(const BB ¤tBB, const BB &endBB, std::vector &traversedBBSet) const; + bool HasCallBetweenInsnInSameBB(const Insn &startInsn, const Insn &endInsn) const; + bool HasCallInPath(const BB &startBB, const BB &endBB, std::vector &visitedBB) const; + bool RegIsUsedInCleanUpBB(uint32 regNO) const; + + MapleSet normalBBSet; + MapleSet cleanUpBBSet; + uint32 maxInsnNO = 0; +}; + +CGFUNCPHASE(CgDoReachingDefinition, "reachingdefinition") +CGFUNCPHASE(CgDoClearRDInfo, "clearrdinfo") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_REACHING_H */ diff --git a/src/maple_be/include/cg/schedule.h b/src/maple_be/include/cg/schedule.h new file mode 100644 index 0000000000000000000000000000000000000000..5558cc9cc30bcd465e2ea4c15bd4dd0aca946e88 --- /dev/null +++ b/src/maple_be/include/cg/schedule.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_SCHEDULE_H +#define MAPLEBE_INCLUDE_CG_SCHEDULE_H + +#include "insn.h" +#include "mad.h" +#include "dependence.h" +#include "live.h" + +namespace maplebe { +#define LIST_SCHED_DUMP CG_DEBUG_FUNC(cgFunc) +#define LIST_SCHED_DUMP_REF CG_DEBUG_FUNC(&cgFunc) + +class RegPressureSchedule { + public: + RegPressureSchedule (CGFunc &func, MapleAllocator &alloc) + : cgFunc(func), liveReg(alloc.Adapter()), scheduledNode(alloc.Adapter()), + readyList(alloc.Adapter()) {} + virtual ~RegPressureSchedule() = default; + + void InitBBInfo(BB &b, MemPool &memPool, const MapleVector &nodes); + void BuildPhyRegInfo(const std::vector ®NumVec); + void InitReadyList(const MapleVector &nodes); + void InitPressure(); + void UpdateBBPressure(const DepNode &node); + void CalculatePressure(DepNode &node, regno_t reg, bool def); + void SortReadyList(); + bool IsLastUse(const DepNode &node, regno_t reg) const; + void ReCalculateDepNodePressure(DepNode &node); + void UpdateLiveReg(const DepNode &node, regno_t reg, bool def); + bool CanSchedule(const DepNode &node) const; + void UpdateReadyList(const DepNode &node); + void UpdatePriority(DepNode &node); + void CalculateMaxDepth(const MapleVector &nodes); + void CalculateNear(const DepNode &node); + static bool DepNodePriorityCmp(const DepNode *node1, const DepNode *node2); + DepNode *ChooseNode(); + void DoScheduling(MapleVector &nodes); + + private: + CGFunc &cgFunc; + BB *bb = nullptr; + int32 *maxPressure = nullptr; + int32 *curPressure = nullptr; + MapleSet liveReg; + /* save node that has been scheduled. */ + MapleVector scheduledNode; + MapleVector readyList; + /* save the amount of every type register. */ + int32 *physicalRegNum = nullptr; + int32 maxPriority = 0; +}; + +enum SimulateType : uint8 { + kListSchedule, + kBruteForce, + kSimulateOnly +}; + +class Schedule { + public: + Schedule(CGFunc &func, MemPool &memPool, LiveAnalysis &liveAnalysis, const std::string &phase) + : phaseName(phase), + cgFunc(func), + memPool(memPool), + alloc(&memPool), + live(liveAnalysis), + nodes(alloc.Adapter()), + readyList(alloc.Adapter()) {} + + virtual ~Schedule() = default; + virtual void MemoryAccessPairOpt() = 0; + virtual void ClinitPairOpt() = 0; + virtual void FindAndCombineMemoryAccessPair(const std::vector &readyList) = 0; + virtual void RegPressureScheduling(BB &bb, MapleVector &depNodes) = 0; + virtual uint32 DoSchedule() = 0; + virtual uint32 DoBruteForceSchedule() = 0; + virtual uint32 SimulateOnly() = 0; + virtual void UpdateBruteForceSchedCycle() = 0; + virtual void IterateBruteForce(DepNode &targetNode, MapleVector &readyList, uint32 currCycle, + MapleVector &scheduledNodes, uint32 &maxCycleCount, + MapleVector &optimizedScheduledNodes) = 0; + virtual void UpdateReadyList(DepNode &targetNode, MapleVector &readyList, bool updateEStart) = 0; + virtual void ListScheduling(bool beforeRA) = 0; + virtual void FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis) = 0; + + protected: + virtual void Init() = 0; + virtual uint32 ComputeEstart(uint32 cycle) = 0; + virtual void ComputeLstart(uint32 maxEstart) = 0; + virtual void UpdateELStartsOnCycle(uint32 cycle) = 0; + virtual void RandomTest() = 0; + virtual void EraseNodeFromReadyList(const DepNode &target) = 0; + virtual void EraseNodeFromNodeList(const DepNode &target, MapleVector &readyList) = 0; + virtual uint32 GetNextSepIndex() const = 0; + virtual void CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const = 0; + virtual bool CanCombine(const Insn &insn) const = 0; + void InitIDAndLoc(); + std::string PhaseName() const { + return phaseName; + } + + const std::string phaseName; + CGFunc &cgFunc; + MemPool &memPool; + MapleAllocator alloc; + LiveAnalysis &live; + DepAnalysis *depAnalysis = nullptr; + MAD *mad = nullptr; + uint32 lastSeparatorIndex = 0; + uint32 nodeSize = 0; + MapleVector nodes; /* Dependence graph */ + MapleVector readyList; /* Ready list. */ +}; + +CGFUNCPHASE_CANSKIP(CgDoPreScheduling, "prescheduling") +CGFUNCPHASE_CANSKIP(CgDoScheduling, "scheduling") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_SCHEDULE_H */ diff --git a/src/maple_be/include/cg/strldr.h b/src/maple_be/include/cg/strldr.h new file mode 100644 index 0000000000000000000000000000000000000000..16b015e46afb0ba0341bd55839b6e75fd36f848a --- /dev/null +++ b/src/maple_be/include/cg/strldr.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_STRLDR_H +#define MAPLEBE_INCLUDE_CG_STRLDR_H +#include "cg_phase.h" + +namespace maplebe { +class StoreLoadOpt { + public: + StoreLoadOpt(CGFunc &func, MemPool &memPool) : cgFunc(func), memPool(memPool) {} + virtual ~StoreLoadOpt() = default; + virtual void Run() = 0; + std::string PhaseName() const { + return "storeloadopt"; + } + + protected: + CGFunc &cgFunc; + MemPool &memPool; + /* if the number of bbs is more than 500 or the number of insns is more than 9000, don't optimize. */ + static constexpr uint32 kMaxBBNum = 500; + static constexpr uint32 kMaxInsnNum = 9000; +}; + +CGFUNCPHASE_CANSKIP(CgDoStoreLoadOpt, "storeloadopt") +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_STRLDR_H */ \ No newline at end of file diff --git a/src/maple_be/src/ad/mad.cpp b/src/maple_be/src/ad/mad.cpp index ade0934edd5bac2bdd925ac3d5bd6072ba2dfec5..e6e40b4d0d784fa9c33ec91c112e6dda7bbb3e6c 100644 --- a/src/maple_be/src/ad/mad.cpp +++ b/src/maple_be/src/ad/mad.cpp @@ -15,6 +15,7 @@ #include "mad.h" #include #include "aarch64_operand.h" +#include "schedule.h" #include "insn.h" namespace maplebe { @@ -175,6 +176,15 @@ Reservation *MAD::FindReservation(const Insn &insn) const { return nullptr; } +/* Get latency that is def insn to use insn */ +int MAD::GetLatency(const Insn &def, const Insn &use) const { + int latency = BypassLatency(def, use); + if (latency < 0) { + latency = DefaultLatency(def); + } + return latency; +} + /* Get bypass latency that is def insn to use insn */ int MAD::BypassLatency(const Insn &def, const Insn &use) const { int latency = -1; @@ -190,6 +200,12 @@ int MAD::BypassLatency(const Insn &def, const Insn &use) const { return latency; } +/* Get insn's default latency */ +int MAD::DefaultLatency(const Insn &insn) const { + Reservation *res = insn.GetDepNode()->GetReservation(); + return res != nullptr ? res->GetLatency() : 0; +} + void MAD::AdvanceCycle() { for (auto unit : allUnits) { unit->AdvanceCycle(); diff --git a/src/maple_be/src/cg/aarch64/aarch64_dependence.cpp b/src/maple_be/src/cg/aarch64/aarch64_dependence.cpp new file mode 100644 index 0000000000000000000000000000000000000000..894b7ee67c660735bafeb90cf95a9082a82971b9 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_dependence.cpp @@ -0,0 +1,1007 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_dependence.h" +#include "aarch64_cg.h" +#include "aarch64_operand.h" +#include "pressure.h" + +/* For building dependence graph, The entry is AArch64DepAnalysis::Run. */ +namespace maplebe { +/* constructor */ +AArch64DepAnalysis::AArch64DepAnalysis(CGFunc &func, MemPool &mp, MAD &mad, bool beforeRA) + : DepAnalysis(func, mp, mad, beforeRA), useRegnos(alloc.Adapter()), + defRegnos(alloc.Adapter()), stackUses(alloc.Adapter()), + stackDefs(alloc.Adapter()), heapUses(alloc.Adapter()), + heapDefs(alloc.Adapter()), mayThrows(alloc.Adapter()), + ambiInsns(alloc.Adapter()), ehInRegs(alloc.Adapter()) { + uint32 maxRegNum; + if (beforeRA) { + maxRegNum = cgFunc.GetMaxVReg(); + } else { + maxRegNum = kAllRegNum; + } + regDefs = memPool.NewArray(maxRegNum); + regUses = memPool.NewArray(maxRegNum); +} + +/* print dep node information */ +void AArch64DepAnalysis::DumpDepNode(DepNode &node) const { + node.GetInsn()->Dump(); + uint32 num = node.GetUnitNum(); + LogInfo::MapleLogger() << "unit num : " << num << ", "; + for (uint32 i = 0; i < num; ++i) { + const Unit *unit = node.GetUnitByIndex(i); + if (unit != nullptr) { + PRINT_VAL(unit->GetName()); + } else { + PRINT_VAL("none"); + } + } + LogInfo::MapleLogger() << '\n'; + node.DumpSchedInfo(); + if (beforeRA) { + node.DumpRegPressure(); + } +} + +/* print dep link information */ +void AArch64DepAnalysis::DumpDepLink(DepLink &link, const DepNode *node) const { + PRINT_VAL(GetDepTypeName(link.GetDepType())); + PRINT_STR_VAL("Latency: ", link.GetLatency()); + if (node != nullptr) { + node->GetInsn()->Dump(); + return; + } + LogInfo::MapleLogger() << "from : "; + link.GetFrom().GetInsn()->Dump(); + LogInfo::MapleLogger() << "to : "; + link.GetTo().GetInsn()->Dump(); +} + +/* Append use register to the list. */ +void AArch64DepAnalysis::AppendRegUseList(Insn &insn, regno_t regNO) { + RegList *regList = memPool.New(); + regList->insn = &insn; + regList->next = nullptr; + if (regUses[regNO] == nullptr) { + regUses[regNO] = regList; + return; + } + RegList *lastRegList = regUses[regNO]; + while (lastRegList->next != nullptr) { + lastRegList = lastRegList->next; + } + lastRegList->next = regList; +} + +/* + * Add dependence edge. + * Two dependence node has a unique edge. + * True dependence overwirtes other dependences. + */ +void AArch64DepAnalysis::AddDependence(DepNode &fromNode, DepNode &toNode, DepType depType) { + /* Can not build a self loop dependence. */ + if (&fromNode == &toNode) { + return; + } + /* Check if exist edge. */ + if (!fromNode.GetSuccs().empty()) { + DepLink *depLink = fromNode.GetSuccs().back(); + if (&(depLink->GetTo()) == &toNode) { + if (depLink->GetDepType() != kDependenceTypeTrue) { + if (depType == kDependenceTypeTrue) { + /* Has exist edge, replace it. */ + depLink->SetDepType(kDependenceTypeTrue); + depLink->SetLatency(mad.GetLatency(*fromNode.GetInsn(), *toNode.GetInsn())); + } + } + return; + } + } + DepLink *depLink = memPool.New(fromNode, toNode, depType); + if (depType == kDependenceTypeTrue) { + depLink->SetLatency(mad.GetLatency(*fromNode.GetInsn(), *toNode.GetInsn())); + } + fromNode.AddSucc(*depLink); + toNode.AddPred(*depLink); +} + +void AArch64DepAnalysis::AddDependence4InsnInVectorByType(MapleVector &insns, Insn &insn, const DepType &type) { + for (auto anyInsn : insns) { + AddDependence(*anyInsn->GetDepNode(), *insn.GetDepNode(), type); + } +} + +void AArch64DepAnalysis::AddDependence4InsnInVectorByTypeAndCmp(MapleVector &insns, Insn &insn, + const DepType &type) { + for (auto anyInsn : insns) { + if (anyInsn != &insn) { + AddDependence(*anyInsn->GetDepNode(), *insn.GetDepNode(), type); + } + } +} + +/* Remove self dependence (self loop) in dependence graph. */ +void AArch64DepAnalysis::RemoveSelfDeps(Insn &insn) { + DepNode *node = insn.GetDepNode(); + ASSERT(node->GetSuccs().back()->GetTo().GetInsn() == &insn, "Is not a self dependence."); + ASSERT(node->GetPreds().back()->GetFrom().GetInsn() == &insn, "Is not a self dependence."); + node->RemoveSucc(); + node->RemovePred(); +} + +/* Build dependences of source register operand. */ +void AArch64DepAnalysis::BuildDepsUseReg(Insn &insn, regno_t regNO) { + useRegnos.push_back(regNO); + if (regDefs[regNO] != nullptr) { + /* Build true dependences. */ + AddDependence(*regDefs[regNO]->GetDepNode(), *insn.GetDepNode(), kDependenceTypeTrue); + } +} + +/* Build dependences of destination register operand. */ +void AArch64DepAnalysis::BuildDepsDefReg(Insn &insn, regno_t regNO) { + defRegnos.push_back(regNO); + /* Build anti dependences. */ + RegList *regList = regUses[regNO]; + while (regList != nullptr) { + CHECK_NULL_FATAL(regList->insn); + AddDependence(*regList->insn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeAnti); + regList = regList->next; + } + /* Build output depnedence. */ + if (regDefs[regNO] != nullptr) { + AddDependence(*regDefs[regNO]->GetDepNode(), *insn.GetDepNode(), kDependenceTypeOutput); + } +} + +void AArch64DepAnalysis::ReplaceDepNodeWithNewInsn(DepNode &firstNode, DepNode &secondNode, Insn& newInsn, + bool isFromClinit) const { + if (isFromClinit) { + firstNode.AddClinitInsn(*firstNode.GetInsn()); + firstNode.AddClinitInsn(*secondNode.GetInsn()); + firstNode.SetCfiInsns(secondNode.GetCfiInsns()); + } else { + for (Insn *insn : secondNode.GetCfiInsns()) { + firstNode.AddCfiInsn(*insn); + } + for (Insn *insn : secondNode.GetComments()) { + firstNode.AddComments(*insn); + } + secondNode.ClearComments(); + } + firstNode.SetInsn(newInsn); + Reservation *rev = mad.FindReservation(newInsn); + CHECK_FATAL(rev != nullptr, "reservation is nullptr."); + firstNode.SetReservation(*rev); + firstNode.SetUnits(rev->GetUnit()); + firstNode.SetUnitNum(rev->GetUnitNum()); + newInsn.SetDepNode(firstNode); +} + +void AArch64DepAnalysis::ClearDepNodeInfo(DepNode &depNode) const { + Insn &insn = cgFunc.GetCG()->BuildInstruction(MOP_pseudo_none); + insn.SetDepNode(depNode); + Reservation *seRev = mad.FindReservation(insn); + depNode.SetInsn(insn); + depNode.SetType(kNodeTypeEmpty); + depNode.SetReservation(*seRev); + depNode.SetUnitNum(0); + depNode.ClearCfiInsns(); + depNode.SetUnits(nullptr); +} + +/* Combine adrpldr&clinit_tail to clinit. */ +void AArch64DepAnalysis::CombineClinit(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator) { + ASSERT(firstNode.GetInsn()->GetMachineOpcode() == MOP_adrp_ldr, "first insn should be adrpldr"); + ASSERT(secondNode.GetInsn()->GetMachineOpcode() == MOP_clinit_tail, "second insn should be clinit_tail"); + ASSERT(firstNode.GetCfiInsns().empty(), "There should not be any comment/cfi instructions between clinit."); + ASSERT(secondNode.GetComments().empty(), "There should not be any comment/cfi instructions between clinit."); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction( + MOP_clinit, firstNode.GetInsn()->GetOperand(0), firstNode.GetInsn()->GetOperand(1)); + newInsn.SetId(firstNode.GetInsn()->GetId()); + /* Replace first node with new insn. */ + ReplaceDepNodeWithNewInsn(firstNode, secondNode, newInsn, true); + /* Clear second node information. */ + ClearDepNodeInfo(secondNode); + CombineDependence(firstNode, secondNode, isAcrossSeparator); +} + +/* + * Combine memory access pair: + * 1.ldr to ldp. + * 2.str to stp. + */ +void AArch64DepAnalysis::CombineMemoryAccessPair(DepNode &firstNode, DepNode &secondNode, bool useFirstOffset) { + ASSERT(firstNode.GetInsn(), "the insn of first Node should not be nullptr"); + ASSERT(secondNode.GetInsn(), "the insn of second Node should not be nullptr"); + MOperator thisMop = firstNode.GetInsn()->GetMachineOpcode(); + MOperator mopPair = GetMopPair(thisMop); + ASSERT(mopPair != 0, "mopPair should not be zero"); + Operand *opnd0 = nullptr; + Operand *opnd1 = nullptr; + Operand *opnd2 = nullptr; + if (useFirstOffset) { + opnd0 = &(firstNode.GetInsn()->GetOperand(0)); + opnd1 = &(secondNode.GetInsn()->GetOperand(0)); + opnd2 = &(firstNode.GetInsn()->GetOperand(1)); + } else { + opnd0 = &(secondNode.GetInsn()->GetOperand(0)); + opnd1 = &(firstNode.GetInsn()->GetOperand(0)); + opnd2 = &(secondNode.GetInsn()->GetOperand(1)); + } + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopPair, *opnd0, *opnd1, *opnd2); + newInsn.SetId(firstNode.GetInsn()->GetId()); + std::string newComment; + const MapleString &comment = firstNode.GetInsn()->GetComment(); + if (comment.c_str() != nullptr) { + newComment += comment.c_str(); + } + const MapleString &secondComment = secondNode.GetInsn()->GetComment(); + if (secondComment.c_str() != nullptr) { + newComment += " "; + newComment += secondComment.c_str(); + } + if ((newComment.c_str() != nullptr) && (strlen(newComment.c_str()) > 0)) { + newInsn.SetComment(newComment); + } + /* Replace first node with new insn. */ + ReplaceDepNodeWithNewInsn(firstNode, secondNode, newInsn, false); + /* Clear second node information. */ + ClearDepNodeInfo(secondNode); + CombineDependence(firstNode, secondNode, false, true); +} + +/* Combine two dependence nodes to one */ +void AArch64DepAnalysis::CombineDependence(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator, + bool isMemCombine) { + if (isAcrossSeparator) { + /* Clear all latency of the second node. */ + for (auto predLink : secondNode.GetPreds()) { + predLink->SetLatency(0); + } + for (auto succLink : secondNode.GetSuccs()) { + succLink->SetLatency(0); + } + return; + } + std::set uniqueNodes; + + for (auto predLink : firstNode.GetPreds()) { + if (predLink->GetDepType() == kDependenceTypeTrue) { + predLink->SetLatency(mad.GetLatency(*predLink->GetFrom().GetInsn(), *firstNode.GetInsn())); + } + uniqueNodes.insert(&predLink->GetFrom()); + } + for (auto predLink : secondNode.GetPreds()) { + if (&predLink->GetFrom() != &firstNode) { + if (uniqueNodes.insert(&(predLink->GetFrom())).second) { + AddDependence(predLink->GetFrom(), firstNode, predLink->GetDepType()); + } + } + predLink->SetLatency(0); + } + uniqueNodes.clear(); + for (auto succLink : firstNode.GetSuccs()) { + if (succLink->GetDepType() == kDependenceTypeTrue) { + succLink->SetLatency(mad.GetLatency(*succLink->GetFrom().GetInsn(), *firstNode.GetInsn())); + } + uniqueNodes.insert(&(succLink->GetTo())); + } + for (auto succLink : secondNode.GetSuccs()) { + if (uniqueNodes.insert(&(succLink->GetTo())).second) { + AddDependence(firstNode, succLink->GetTo(), succLink->GetDepType()); + if (isMemCombine) { + succLink->GetTo().IncreaseValidPredsSize(); + } + } + succLink->SetLatency(0); + } +} + +/* + * Build dependences of ambiguous instruction. + * ambiguous instruction : instructions that can not across may throw instructions. + */ +void AArch64DepAnalysis::BuildDepsAmbiInsn(Insn &insn) { + AddDependence4InsnInVectorByType(mayThrows, insn, kDependenceTypeThrow); + ambiInsns.push_back(&insn); +} + +/* Build dependences of may throw instructions. */ +void AArch64DepAnalysis::BuildDepsMayThrowInsn(Insn &insn) { + AddDependence4InsnInVectorByType(ambiInsns, insn, kDependenceTypeThrow); +} + +bool AArch64DepAnalysis::IsFrameReg(const RegOperand &opnd) const { + return (opnd.GetRegisterNumber() == RFP) || (opnd.GetRegisterNumber() == RSP); +} + +AArch64MemOperand *AArch64DepAnalysis::BuildNextMemOperandByByteSize(AArch64MemOperand &aarchMemOpnd, + uint32 byteSize) const { + AArch64MemOperand *nextMemOpnd = nullptr; + Operand *nextOpnd = aarchMemOpnd.Clone(memPool); + nextMemOpnd = static_cast(nextOpnd); + Operand *nextOfstOpnd = nextMemOpnd->GetOffsetImmediate()->Clone(memPool); + AArch64OfstOperand *aarchNextOfstOpnd = static_cast(nextOfstOpnd); + CHECK_NULL_FATAL(aarchNextOfstOpnd); + int32 offsetVal = aarchNextOfstOpnd->GetOffsetValue(); + aarchNextOfstOpnd->SetOffsetValue(offsetVal + byteSize); + nextMemOpnd->SetOffsetImmediate(*aarchNextOfstOpnd); + return nextMemOpnd; +} + +/* Get the second memory access operand of stp/ldp instructions. */ +AArch64MemOperand *AArch64DepAnalysis::GetNextMemOperand(Insn &insn, AArch64MemOperand &aarchMemOpnd) const { + AArch64MemOperand *nextMemOpnd = nullptr; + switch (insn.GetMachineOpcode()) { + case MOP_wldp: + case MOP_sldp: + case MOP_xldpsw: + case MOP_wstp: + case MOP_sstp: { + nextMemOpnd = BuildNextMemOperandByByteSize(aarchMemOpnd, k4ByteSize); + break; + } + case MOP_xldp: + case MOP_dldp: + case MOP_xstp: + case MOP_dstp: { + nextMemOpnd = BuildNextMemOperandByByteSize(aarchMemOpnd, k8ByteSize); + break; + } + default: + break; + } + + return nextMemOpnd; +} + +/* + * Build dependences of symbol memory access. + * Memory access with symbol must be a heap memory access. + */ +void AArch64DepAnalysis::BuildDepsAccessStImmMem(Insn &insn, bool isDest) { + if (isDest) { + /* + * Heap memory + * Build anti dependences. + */ + AddDependence4InsnInVectorByType(heapUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeOutput); + heapDefs.push_back(&insn); + } else { + /* Heap memory */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeTrue); + heapUses.push_back(&insn); + } + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } +} + +/* Build dependences of stack memory and heap memory uses. */ +void AArch64DepAnalysis::BuildDepsUseMem(Insn &insn, MemOperand &memOpnd) { + RegOperand *baseRegister = memOpnd.GetBaseRegister(); + AArch64MemOperand &aarchMemOpnd = static_cast(memOpnd); + AArch64MemOperand *nextMemOpnd = GetNextMemOperand(insn, aarchMemOpnd); + if (((baseRegister != nullptr) && IsFrameReg(*baseRegister)) || aarchMemOpnd.IsStackMem()) { + /* Stack memory address */ + for (auto defInsn : stackDefs) { + if (defInsn->IsCall() || NeedBuildDepsMem(aarchMemOpnd, nextMemOpnd, *defInsn)) { + AddDependence(*defInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeTrue); + continue; + } + } + stackUses.push_back(&insn); + } else { + /* Heap memory */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeTrue); + heapUses.push_back(&insn); + } + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } +} + +/* Return true if memInsn's memOpnd no alias with memOpnd and nextMemOpnd */ +bool AArch64DepAnalysis::NeedBuildDepsMem(const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd, + Insn &memInsn) const { + auto *memOpndOfmemInsn = static_cast(memInsn.GetMemOpnd()); + if (!memOpnd.NoAlias(*memOpndOfmemInsn) || ((nextMemOpnd != nullptr) && !nextMemOpnd->NoAlias(*memOpndOfmemInsn))) { + return true; + } + AArch64MemOperand *nextMemOpndOfmemInsn = GetNextMemOperand(memInsn, *memOpndOfmemInsn); + if (nextMemOpndOfmemInsn != nullptr) { + if (!memOpnd.NoAlias(*nextMemOpndOfmemInsn) || + ((nextMemOpnd != nullptr) && !nextMemOpnd->NoAlias(*nextMemOpndOfmemInsn))) { + return true; + } + } + return false; +} + +/* + * Build anti dependences between insn and other insn that use stack memroy. + * insn : the instruction that defines stack memory. + * memOpnd : insn's memOpnd + * nextMemOpnd : some memory pair operator instruction (like ldp/stp) defines two memory. + */ +void AArch64DepAnalysis::BuildAntiDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, + const AArch64MemOperand *nextMemOpnd) { + for (auto *useInsn : stackUses) { + if (NeedBuildDepsMem(memOpnd, nextMemOpnd, *useInsn)) { + AddDependence(*useInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeAnti); + } + } +} + +/* + * Build output dependences between insn with other insn that define stack memroy. + * insn : the instruction that defines stack memory. + * memOpnd : insn's memOpnd + * nextMemOpnd : some memory pair operator instruction (like ldp/stp) defines two memory. + */ +void AArch64DepAnalysis::BuildOutputDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, + const AArch64MemOperand *nextMemOpnd) { + for (auto defInsn : stackDefs) { + if (defInsn->IsCall() || NeedBuildDepsMem(memOpnd, nextMemOpnd, *defInsn)) { + AddDependence(*defInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeOutput); + } + } +} + +/* Build dependences of stack memory and heap memory definitions. */ +void AArch64DepAnalysis::BuildDepsDefMem(Insn &insn, MemOperand &memOpnd) { + RegOperand *baseRegister = memOpnd.GetBaseRegister(); + AArch64MemOperand &aarchMemOpnd = static_cast(memOpnd); + AArch64MemOperand *nextMemOpnd = GetNextMemOperand(insn, aarchMemOpnd); + + if (((baseRegister != nullptr) && IsFrameReg(*baseRegister)) || aarchMemOpnd.IsStackMem()) { + /* Build anti dependences. */ + BuildAntiDepsDefStackMem(insn, aarchMemOpnd, nextMemOpnd); + /* Build output depnedence. */ + BuildOutputDepsDefStackMem(insn, aarchMemOpnd, nextMemOpnd); + if (lastCallInsn != nullptr) { + /* Build a dependence between stack passed arguments and call. */ + ASSERT(baseRegister != nullptr, "baseRegister shouldn't be null here"); + if (baseRegister->GetRegisterNumber() == RSP) { + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeControl); + } + } + stackDefs.push_back(&insn); + } else { + /* Heap memory + * Build anti dependences. + */ + AddDependence4InsnInVectorByType(heapUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeOutput); + heapDefs.push_back(&insn); + } + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } + /* Memory definition can not across may-throw insns. */ + AddDependence4InsnInVectorByType(mayThrows, insn, kDependenceTypeThrow); +} + +/* Build dependences of memory barrior instructions. */ +void AArch64DepAnalysis::BuildDepsMemBar(Insn &insn) { + AddDependence4InsnInVectorByTypeAndCmp(stackUses, insn, kDependenceTypeMembar); + AddDependence4InsnInVectorByTypeAndCmp(heapUses, insn, kDependenceTypeMembar); + AddDependence4InsnInVectorByTypeAndCmp(stackDefs, insn, kDependenceTypeMembar); + AddDependence4InsnInVectorByTypeAndCmp(heapDefs, insn, kDependenceTypeMembar); + memBarInsn = &insn; +} + +/* A pseudo separator node depends all the other nodes. */ +void AArch64DepAnalysis::BuildDepsSeparator(DepNode &newSepNode, MapleVector &nodes) { + uint32 nextSepIndex = (separatorIndex + kMaxDependenceNum) < nodes.size() + ? (separatorIndex + kMaxDependenceNum) + : nodes.size() - 1; + for (uint32 i = separatorIndex; i < nextSepIndex; ++i) { + AddDependence(*nodes[i], newSepNode, kDependenceTypeSeparator); + } +} + + +/* Build control dependence for branch/ret instructions. */ +void AArch64DepAnalysis::BuildDepsControlAll(DepNode &depNode, const MapleVector &nodes) { + for (uint32 i = separatorIndex; i < depNode.GetIndex(); ++i) { + AddDependence(*nodes[i], depNode, kDependenceTypeControl); + } +} + +/* + * Build dependences of call instructions. + * Caller-saved physical registers will defined by a call instruction. + * Also a conditional register may modified by a call. + */ +void AArch64DepAnalysis::BuildCallerSavedDeps(Insn &insn) { + /* Build anti dependence and output dependence. */ + for (uint32 i = R0; i <= R7; ++i) { + BuildDepsDefReg(insn, i); + } + for (uint32 i = V0; i <= V7; ++i) { + BuildDepsDefReg(insn, i); + } + if (!beforeRA) { + for (uint32 i = R8; i <= R18; ++i) { + BuildDepsDefReg(insn, i); + } + for (uint32 i = R29; i <= RSP; ++i) { + BuildDepsUseReg(insn, i); + } + for (uint32 i = V16; i <= V31; ++i) { + BuildDepsDefReg(insn, i); + } + } + /* For condition operand, such as NE, EQ, and so on. */ + if (cgFunc.GetRflag() != nullptr) { + BuildDepsDefReg(insn, kRFLAG); + } +} + +/* + * Build dependence between control register and last call instruction. + * insn : instruction that with control register operand. + * isDest : if the control register operand is a destination operand. + */ +void AArch64DepAnalysis::BuildDepsBetweenControlRegAndCall(Insn &insn, bool isDest) { + if (lastCallInsn == nullptr) { + return; + } + if (isDest) { + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeOutput); + return; + } + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeAnti); +} + +/* + * Build dependence between stack-define-instruction that deal with call-insn's args and a call-instruction. + * insn : a call instruction (call/tail-call) + */ +void AArch64DepAnalysis::BuildStackPassArgsDeps(Insn &insn) { + for (auto stackDefInsn : stackDefs) { + if (stackDefInsn->IsCall()) { + continue; + } + Operand *opnd = stackDefInsn->GetMemOpnd(); + ASSERT(opnd->IsMemoryAccessOperand(), "make sure opnd is memOpnd"); + MemOperand *memOpnd = static_cast(opnd); + RegOperand *baseReg = memOpnd->GetBaseRegister(); + if ((baseReg != nullptr) && (baseReg->GetRegisterNumber() == RSP)) { + AddDependence(*stackDefInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeControl); + } + } +} + +/* Some insns may dirty all stack memory, such as "bl MCC_InitializeLocalStackRef". */ +void AArch64DepAnalysis::BuildDepsDirtyStack(Insn &insn) { + /* Build anti dependences. */ + AddDependence4InsnInVectorByType(stackUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(stackDefs, insn, kDependenceTypeOutput); + stackDefs.push_back(&insn); +} + +/* Some call insns may use all stack memory, such as "bl MCC_CleanupLocalStackRef_NaiveRCFast". */ +void AArch64DepAnalysis::BuildDepsUseStack(Insn &insn) { + /* Build true dependences. */ + AddDependence4InsnInVectorByType(stackDefs, insn, kDependenceTypeTrue); +} + +/* Some insns may dirty all heap memory, such as a call insn. */ +void AArch64DepAnalysis::BuildDepsDirtyHeap(Insn &insn) { + /* Build anti dependences. */ + AddDependence4InsnInVectorByType(heapUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeOutput); + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } + heapDefs.push_back(&insn); +} + +/* Build a pseudo node to seperate dependence graph. */ +DepNode *AArch64DepAnalysis::BuildSeparatorNode() { + Insn &pseudoSepInsn = cgFunc.GetCG()->BuildInstruction(MOP_pseudo_dependence_seperator); + DepNode *separatorNode = memPool.New(pseudoSepInsn, alloc); + separatorNode->SetType(kNodeTypeSeparator); + if (beforeRA) { + RegPressure *regPressure = memPool.New(alloc); + separatorNode->SetRegPressure(*regPressure); + separatorNode->SetPressure(*memPool.NewArray(RegPressure::GetMaxRegClassNum())); + } + return separatorNode; +} + +/* Init depAnalysis data struction */ +void AArch64DepAnalysis::Init(BB &bb, MapleVector &nodes) { + ClearAllDepData(); + lastComments.clear(); + /* Analysis live-in registers in catch BB. */ + AnalysisAmbiInsns(bb); + /* Clear all dependence nodes and push the first separator node. */ + nodes.clear(); + nodes.push_back(BuildSeparatorNode()); + separatorIndex = 0; +} + +/* When a separator build, it is the same as a new basic block. */ +void AArch64DepAnalysis::ClearAllDepData() { + uint32 maxRegNum; + if (beforeRA) { + maxRegNum = cgFunc.GetMaxVReg(); + } else { + maxRegNum = kAllRegNum; + } + errno_t ret = memset_s(regDefs, sizeof(Insn*) * maxRegNum, 0, sizeof(Insn*) * maxRegNum); + CHECK_FATAL(ret == EOK, "call memset_s failed in Unit"); + ret = memset_s(regUses, sizeof(RegList*) * maxRegNum, 0, sizeof(RegList*) * maxRegNum); + CHECK_FATAL(ret == EOK, "call memset_s failed in Unit"); + memBarInsn = nullptr; + lastCallInsn = nullptr; + lastFrameDef = nullptr; + + stackUses.clear(); + stackDefs.clear(); + heapUses.clear(); + heapDefs.clear(); + mayThrows.clear(); + ambiInsns.clear(); +} + +/* Analysis live-in registers in catch bb and cleanup bb. */ +void AArch64DepAnalysis::AnalysisAmbiInsns(BB &bb) { + hasAmbiRegs = false; + if (bb.GetEhSuccs().empty()) { + return; + } + + /* Union all catch bb */ + for (auto succBB : bb.GetEhSuccs()) { + const MapleSet &liveInRegSet = succBB->GetLiveInRegNO(); + set_union(liveInRegSet.begin(), liveInRegSet.end(), + ehInRegs.begin(), ehInRegs.end(), + inserter(ehInRegs, ehInRegs.begin())); + } + + /* Union cleanup entry bb. */ + const MapleSet ®NOSet = cgFunc.GetCleanupEntryBB()->GetLiveInRegNO(); + std::set_union(regNOSet.begin(), regNOSet.end(), + ehInRegs.begin(), ehInRegs.end(), + inserter(ehInRegs, ehInRegs.begin())); + + /* Subtract R0 and R1, that is defined by eh runtime. */ + ehInRegs.erase(R0); + ehInRegs.erase(R1); + if (ehInRegs.empty()) { + return; + } + hasAmbiRegs = true; +} + +/* Check if regNO is in ehInRegs. */ +bool AArch64DepAnalysis::IfInAmbiRegs(regno_t regNO) const { + if (!hasAmbiRegs) { + return false; + } + if (ehInRegs.find(regNO) != ehInRegs.end()) { + return true; + } + return false; +} + +/* + * Build dependences of memory operand. + * insn : a instruction with the memory access operand. + * opnd : the memory access operand. + * regProp : operand property of the memory access operandess operand. + */ +void AArch64DepAnalysis::BuildMemOpndDependency(Insn &insn, Operand &opnd, const AArch64OpndProp ®Prop) { + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be memory Operand"); + AArch64MemOperand *memOpnd = static_cast(&opnd); + RegOperand *baseRegister = memOpnd->GetBaseRegister(); + if (baseRegister != nullptr) { + regno_t regNO = baseRegister->GetRegisterNumber(); + BuildDepsUseReg(insn, regNO); + if ((memOpnd->GetAddrMode() == AArch64MemOperand::kAddrModeBOi) && + (memOpnd->IsPostIndexed() || memOpnd->IsPreIndexed())) { + /* Base operand has changed. */ + BuildDepsDefReg(insn, regNO); + } + } + RegOperand *indexRegister = memOpnd->GetIndexRegister(); + if (indexRegister != nullptr) { + regno_t regNO = indexRegister->GetRegisterNumber(); + BuildDepsUseReg(insn, regNO); + } + if (regProp.IsUse()) { + BuildDepsUseMem(insn, *memOpnd); + } else { + BuildDepsDefMem(insn, *memOpnd); + BuildDepsAmbiInsn(insn); + } + if (insn.IsYieldPoint()) { + BuildDepsMemBar(insn); + BuildDepsDefReg(insn, kRFLAG); + } +} + +/* Build Dependency for each Operand of insn */ +void AArch64DepAnalysis::BuildOpndDependency(Insn &insn) { + const AArch64MD* md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + MOperator mOp = insn.GetMachineOpcode(); + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (opnd.IsMemoryAccessOperand()) { + BuildMemOpndDependency(insn, opnd, *regProp); + } else if (opnd.IsStImmediate()) { + if (mOp != MOP_xadrpl12) { + BuildDepsAccessStImmMem(insn, false); + } + } else if (opnd.IsRegister()) { + RegOperand ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + + if (regProp->IsUse()) { + BuildDepsUseReg(insn, regNO); + } + + if (regProp->IsDef()) { + BuildDepsDefReg(insn, regNO); + } + } else if (opnd.IsConditionCode()) { + /* For condition operand, such as NE, EQ, and so on. */ + if (regProp->IsUse()) { + BuildDepsUseReg(insn, kRFLAG); + BuildDepsBetweenControlRegAndCall(insn, false); + } + + if (regProp->IsDef()) { + BuildDepsDefReg(insn, kRFLAG); + BuildDepsBetweenControlRegAndCall(insn, true); + } + } else if (opnd.IsList()) { + ListOperand &listOpnd = static_cast(opnd); + /* Build true dependences */ + for (auto lst : listOpnd.GetOperands()) { + regno_t regNO = lst->GetRegisterNumber(); + BuildDepsUseReg(insn, regNO); + } + } + } +} + +/* + * Build dependences in some special issue (stack/heap/throw/clinit/lazy binding/control flow). + * insn : a instruction. + * depNode : insn's depNode. + * nodes : the dependence nodes inclue insn's depNode. + */ +void AArch64DepAnalysis::BuildSpecialInsnDependency(Insn &insn, DepNode &depNode, const MapleVector &nodes) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + MOperator mOp = insn.GetMachineOpcode(); + if (insn.IsCall() || insn.IsTailCall()) { + /* Caller saved registers. */ + BuildCallerSavedDeps(insn); + BuildStackPassArgsDeps(insn); + + if (mOp == MOP_xbl) { + FuncNameOperand &target = static_cast(insn.GetOperand(0)); + if ((target.GetName() == "MCC_InitializeLocalStackRef") || + (target.GetName() == "MCC_ClearLocalStackRef") || + (target.GetName() == "MCC_DecRefResetPair")) { + /* Write stack memory. */ + BuildDepsDirtyStack(insn); + } else if ((target.GetName() == "MCC_CleanupLocalStackRef_NaiveRCFast") || + (target.GetName() == "MCC_CleanupLocalStackRefSkip_NaiveRCFast") || + (target.GetName() == "MCC_CleanupLocalStackRefSkip")) { + /* UseStackMemory. */ + BuildDepsUseStack(insn); + } + } + BuildDepsDirtyHeap(insn); + BuildDepsAmbiInsn(insn); + if (lastCallInsn != nullptr) { + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeControl); + } + lastCallInsn = &insn; + } else if (insn.IsClinit() || insn.IsLazyLoad()) { + BuildDepsDirtyHeap(insn); + BuildDepsDefReg(insn, kRFLAG); + if (!insn.IsAdrpLdr()) { + BuildDepsDefReg(insn, R16); + BuildDepsDefReg(insn, R17); + } + } else if ((mOp == MOP_xret) || md->IsBranch()) { + BuildDepsControlAll(depNode, nodes); + } else if (insn.IsMemAccessBar()) { + BuildDepsMemBar(insn); + } else if (insn.IsSpecialIntrinsic()) { + BuildDepsDirtyHeap(insn); + } +} + +/* + * If the instruction's number of current basic block more than kMaxDependenceNum, + * then insert some pseudo separator node to split baic block. + */ +void AArch64DepAnalysis::SeperateDependenceGraph(MapleVector &nodes, uint32 &nodeSum) { + if ((nodeSum > 0) && ((nodeSum % kMaxDependenceNum) == 0)) { + ASSERT(nodeSum == nodes.size(), "CG internal error, nodeSum should equal to nodes.size."); + /* Add a pseudo node to seperate dependence graph. */ + DepNode *separatorNode = BuildSeparatorNode(); + separatorNode->SetIndex(nodeSum); + nodes.push_back(separatorNode); + BuildDepsSeparator(*separatorNode, nodes); + ClearAllDepData(); + separatorIndex = nodeSum++; + } +} + +/* + * Generate a depNode, + * insn : create depNode for the instruction. + * nodes : a vector to store depNode. + * nodeSum : the new depNode's index. + * comments : those comment insn between last no-comment's insn and insn. + */ +DepNode *AArch64DepAnalysis::GenerateDepNode(Insn &insn, MapleVector &nodes, + int32 nodeSum, const MapleVector &comments) { + DepNode *depNode = nullptr; + Reservation *rev = mad.FindReservation(insn); + ASSERT(rev != nullptr, "rev is nullptr"); + depNode = memPool.New(insn, alloc, rev->GetUnit(), rev->GetUnitNum(), *rev); + if (beforeRA) { + RegPressure *regPressure = memPool.New(alloc); + depNode->SetRegPressure(*regPressure); + depNode->SetPressure(*memPool.NewArray(RegPressure::GetMaxRegClassNum())); + } + depNode->SetIndex(nodeSum); + nodes.push_back(depNode); + insn.SetDepNode(*depNode); + + if (!comments.empty()) { + depNode->SetComments(comments); + } + return depNode; +} + +void AArch64DepAnalysis::BuildAmbiInsnDependency(Insn &insn) { + for (const auto ®NO : defRegnos) { + if (IfInAmbiRegs(regNO)) { + BuildDepsAmbiInsn(insn); + break; + } + } +} + +void AArch64DepAnalysis::BuildMayThrowInsnDependency(Insn &insn) { + /* build dependency for maythrow insn; */ + if (insn.MayThrow()) { + BuildDepsMayThrowInsn(insn); + if (lastFrameDef != nullptr) { + AddDependence(*lastFrameDef->GetDepNode(), *insn.GetDepNode(), kDependenceTypeThrow); + } + } +} + +void AArch64DepAnalysis::UpdateRegUseAndDef(Insn &insn, DepNode &depNode) { + for (auto regNO : useRegnos) { + AppendRegUseList(insn, regNO); + if (beforeRA) { + depNode.AddUseReg(regNO); + depNode.SetRegUses(regNO, *regUses[regNO]); + } + } + for (const auto ®NO : defRegnos) { + regDefs[regNO] = &insn; + regUses[regNO] = nullptr; + if (beforeRA) { + depNode.AddDefReg(regNO); + } + } +} + +/* Update stack and heap dependency */ +void AArch64DepAnalysis::UpdateStackAndHeapDependency(DepNode &depNode, Insn &insn, const Insn &locInsn) { + if (!insn.MayThrow()) { + return; + } + depNode.SetLocInsn(locInsn); + mayThrows.push_back(&insn); + AddDependence4InsnInVectorByType(stackDefs, insn, kDependenceTypeThrow); + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeThrow); +} + +/* + * Build dependence graph. + * 1: Build dependence nodes. + * 2: Build edges between dependence nodes. Edges are: + * 2.1) True dependences + * 2.2) Anti dependences + * 2.3) Output dependences + * 2.4) Barrier dependences + */ +void AArch64DepAnalysis::Run(BB &bb, MapleVector &nodes) { + /* Initial internal datas. */ + Init(bb, nodes); + uint32 nodeSum = 1; + MapleVector comments(alloc.Adapter()); + const Insn *locInsn = bb.GetFirstLoc(); + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + if (insn->IsImmaterialInsn()) { + if (!insn->IsComment()) { + locInsn = insn; + } else { + comments.push_back(insn); + } + } else if (insn->IsCfiInsn()) { + if (!nodes.empty()) { + nodes.back()->AddCfiInsn(*insn); + } + } + continue; + } + /* Add a pseudo node to seperate dependence graph when appropriate */ + SeperateDependenceGraph(nodes, nodeSum); + /* generate a DepNode */ + DepNode *depNode = GenerateDepNode(*insn, nodes, nodeSum, comments); + ++nodeSum; + comments.clear(); + /* Build Dependency for maythrow insn; */ + BuildMayThrowInsnDependency(*insn); + useRegnos.clear(); + defRegnos.clear(); + /* Build Dependency for each Operand of insn */ + BuildOpndDependency(*insn); + /* Build Dependency for special insn */ + BuildSpecialInsnDependency(*insn, *depNode, nodes); + /* Build Dependency for AmbiInsn if needed */ + BuildAmbiInsnDependency(*insn); + /* Update stack and heap dependency */ + UpdateStackAndHeapDependency(*depNode, *insn, *locInsn); + if (insn->IsFrameDef()) { + lastFrameDef = insn; + } + /* Seperator exists. */ + AddDependence(*nodes[separatorIndex], *insn->GetDepNode(), kDependenceTypeSeparator); + /* Update register use and register def */ + UpdateRegUseAndDef(*insn, *depNode); + } + + DepNode *separatorNode = BuildSeparatorNode(); + nodes.push_back(separatorNode); + BuildDepsSeparator(*separatorNode, nodes); + if (!comments.empty()) { + lastComments = comments; + } + comments.clear(); +} + +/* return dependence type name */ +const std::string &AArch64DepAnalysis::GetDepTypeName(DepType depType) const { + ASSERT(depType <= kDependenceTypeNone, "array boundary check failed"); + return kDepTypeName[depType]; +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_ebo.cpp b/src/maple_be/src/cg/aarch64/aarch64_ebo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1c8efbd37ff53c39ca0a314e5dfe7ea191c5b023 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_ebo.cpp @@ -0,0 +1,897 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_ebo.h" +#include "aarch64_cg.h" +#include "mpl_logging.h" +namespace maplebe { +using namespace maple; +#define EBO_DUMP CG_DEBUG_FUNC(cgFunc) + +bool AArch64Ebo::IsFmov(const Insn &insn) const { + return ((MOP_xvmovsr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_xvmovrd)); +} + +bool AArch64Ebo::IsAdd(const Insn &insn) const { + return ((MOP_xaddrrr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_ssub)); +} + +bool AArch64Ebo::IsZeroRegister(const Operand &opnd) const { + if (!opnd.IsRegister()) { + return false; + } + const AArch64RegOperand *regOpnd = static_cast(&opnd); + return regOpnd->IsZeroRegister(); +} + +bool AArch64Ebo::IsClinitCheck(const Insn &insn) const { + MOperator mOp = insn.GetMachineOpcode(); + return ((mOp == MOP_clinit) || (mOp == MOP_clinit_tail)); +} + +/* retrun true if insn is globalneeded */ +bool AArch64Ebo::IsGlobalNeeded(Insn &insn) const { + /* Calls may have side effects. */ + if (insn.IsCall()) { + return true; + } + + /* Intrinsic call should not be removed. */ + if (insn.IsSpecialIntrinsic()) { + return true; + } + + /* Clinit should not be removed. */ + if (insn.IsFixedInsn()) { + return true; + } + + /* Yieldpoints should not be removed by optimizer. */ + if (cgFunc->GetCG()->GenYieldPoint() && insn.IsYieldPoint()) { + return true; + } + + Operand *opnd = insn.GetResult(0); + if ((opnd != nullptr) && (opnd->IsConstReg() || (opnd->IsRegister() && static_cast(opnd)->IsSPOrFP()))) { + return true; + } + return false; +} + +/* in aarch64,resOp will not be def and use in the same time */ +bool AArch64Ebo::ResIsNotDefAndUse(Insn &insn) const { + (void)insn; + return true; +} + +bool AArch64Ebo::IsLastAndBranch(BB &bb, Insn &insn) const { + return (bb.GetLastInsn() == &insn) && insn.IsBranch(); +} + +const RegOperand &AArch64Ebo::GetRegOperand(const Operand &opnd) const { + CHECK_FATAL(opnd.IsRegister(), "aarch64 shoud not have regShiftOp! opnd is not register!"); + const auto &res = static_cast(opnd); + return res; +} + +/* Create infomation for local_opnd from its def insn current_insn. */ +OpndInfo *AArch64Ebo::OperandInfoDef(BB ¤tBB, Insn ¤tInsn, Operand &localOpnd) { + int32 hashVal = localOpnd.IsRegister() ? -1 : ComputeOpndHash(localOpnd); + OpndInfo *opndInfoPrev = GetOpndInfo(localOpnd, hashVal); + OpndInfo *opndInfo = GetNewOpndInfo(currentBB, ¤tInsn, localOpnd, hashVal); + if (localOpnd.IsMemoryAccessOperand()) { + MemOpndInfo *memInfo = static_cast(opndInfo); + MemOperand *mem = static_cast(&localOpnd); + Operand *base = mem->GetBaseRegister(); + Operand *offset = mem->GetOffset(); + if (base != nullptr && base->IsRegister()) { + memInfo->SetBaseInfo(*OperandInfoUse(currentBB, *base)); + } + if (offset != nullptr && offset->IsRegister()) { + memInfo->SetOffsetInfo(*OperandInfoUse(currentBB, *offset)); + } + } + opndInfo->same = opndInfoPrev; + if ((opndInfoPrev != nullptr)) { + opndInfoPrev->redefined = TRUE; + if (opndInfoPrev->bb == ¤tBB) { + opndInfoPrev->redefinedInBB = TRUE; + } + UpdateOpndInfo(localOpnd, *opndInfoPrev, opndInfo, hashVal); + } else { + SetOpndInfo(localOpnd, opndInfo, hashVal); + } + return opndInfo; +} + +void AArch64Ebo::DefineClinitSpecialRegisters(InsnInfo &insnInfo) { + Insn *insn = insnInfo.insn; + CHECK_FATAL(insn != nullptr, "nullptr of currInsnInfo"); + RegOperand &phyOpnd1 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R16, k64BitSize, kRegTyInt); + OpndInfo *opndInfo = OperandInfoDef(*insn->GetBB(), *insn, phyOpnd1); + opndInfo->insnInfo = &insnInfo; + + RegOperand &phyOpnd2 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R17, k64BitSize, kRegTyInt); + opndInfo = OperandInfoDef(*insn->GetBB(), *insn, phyOpnd2); + opndInfo->insnInfo = &insnInfo; +} + +void AArch64Ebo::BuildCallerSaveRegisters() { + callerSaveRegTable.clear(); + RegOperand &phyOpndR0 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt); + RegOperand &phyOpndV0 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(V0, k64BitSize, kRegTyFloat); + callerSaveRegTable.push_back(&phyOpndR0); + callerSaveRegTable.push_back(&phyOpndV0); + for (uint32 i = R1; i <= R18; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + callerSaveRegTable.push_back(&phyOpnd); + } + for (uint32 i = V1; i <= V7; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + callerSaveRegTable.push_back(&phyOpnd); + } + for (uint32 i = V16; i <= V31; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + callerSaveRegTable.push_back(&phyOpnd); + } + CHECK_FATAL(callerSaveRegTable.size() < kMaxCallerSaveReg, + "number of elements in callerSaveRegTable must less then 45!"); +} + +void AArch64Ebo::DefineCallerSaveRegisters(InsnInfo &insnInfo) { + Insn *insn = insnInfo.insn; + ASSERT(insn->IsCall(), "insn should be a call insn."); + for (auto opnd : callerSaveRegTable) { + OpndInfo *opndInfo = OperandInfoDef(*insn->GetBB(), *insn, *opnd); + opndInfo->insnInfo = &insnInfo; + } +} + +void AArch64Ebo::DefineReturnUseRegister(Insn &insn) { + /* Define scalar callee save register and FP, LR. */ + for (uint32 i = R19; i <= R30; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpnd); + } + + /* Define SP */ + RegOperand &phyOpndSP = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(RSP), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpndSP); + + /* Define FP callee save registers. */ + for (uint32 i = V8; i <= V15; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + OperandInfoUse(*insn.GetBB(), phyOpnd); + } +} + +void AArch64Ebo::DefineCallUseSpecialRegister(Insn &insn) { + /* Define FP, LR. */ + for (uint32 i = R29; i <= R30; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpnd); + } + + /* Define SP */ + RegOperand &phyOpndSP = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(RSP), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpndSP); +} + +/* return true if op1 == op2 */ +bool AArch64Ebo::OperandEqSpecial(const Operand &op1, const Operand &op2) const { + switch (op1.GetKind()) { + case Operand::kOpdRegister: { + const AArch64RegOperand ®1 = static_cast(op1); + const AArch64RegOperand ®2 = static_cast(op2); + return reg1 == reg2; + } + case Operand::kOpdImmediate: { + const ImmOperand &imm1 = static_cast(op1); + const ImmOperand &imm2 = static_cast(op2); + return imm1 == imm2; + } + case Operand::kOpdOffset: { + const AArch64OfstOperand &ofst1 = static_cast(op1); + const AArch64OfstOperand &ofst2 = static_cast(op2); + return ofst1 == ofst2; + } + case Operand::kOpdStImmediate: { + const StImmOperand &stImm1 = static_cast(op1); + const StImmOperand &stImm2 = static_cast(op2); + return stImm1 == stImm2; + } + case Operand::kOpdMem: { + const AArch64MemOperand &mem1 = static_cast(op1); + const AArch64MemOperand &mem2 = static_cast(op2); + if (mem1.GetAddrMode() == mem2.GetAddrMode()) { + ASSERT(mem1.GetBaseRegister() != nullptr, "nullptr check"); + ASSERT(mem2.GetBaseRegister() != nullptr, "nullptr check"); + } + return ((mem1.GetAddrMode() == mem2.GetAddrMode()) && + OperandEqual(*(mem1.GetBaseRegister()), *(mem2.GetBaseRegister())) && + OperandEqual(*(mem1.GetIndexRegister()), *(mem2.GetIndexRegister())) && + OperandEqual(*(mem1.GetOffsetOperand()), *(mem2.GetOffsetOperand())) && + (mem1.GetSymbol() == mem2.GetSymbol()) && (mem1.GetSize() == mem2.GetSize())); + } + default: { + return false; + } + } +} + +int32 AArch64Ebo::GetOffsetVal(const MemOperand &mem) const { + const AArch64MemOperand &memOpnd = static_cast(mem); + AArch64OfstOperand *offset = memOpnd.GetOffsetImmediate(); + int32 val = 0; + if (offset != nullptr) { + val += offset->GetOffsetValue(); + + if (offset->IsSymOffset() || offset->IsSymAndImmOffset()) { + val += offset->GetSymbol()->GetStIdx().Idx(); + } + } + return val; +} + +/* + * move vreg, 0 + * store vreg, mem + * ===> + * store wzr, mem + * return true if do simplify successfully. + */ +bool AArch64Ebo::DoConstProp(Insn &insn, uint32 idx, Operand &opnd) { + AArch64ImmOperand *src = static_cast(&opnd); + const AArch64MD *md = &AArch64CG::kMd[(insn.GetMachineOpcode())]; + /* avoid the invalid case "cmp wzr, #0"/"add w1, wzr, #100" */ + if (src->IsZero() && insn.GetOperand(idx).IsRegister() && (insn.IsStore() || insn.IsMove() || md->IsCondDef())) { + insn.SetOperand(idx, *GetZeroOpnd(src->GetSize())); + return true; + } + MOperator mopCode = insn.GetMachineOpcode(); + switch (mopCode) { + case MOP_xmovrr: + case MOP_wmovrr: { + ASSERT(idx == kInsnSecondOpnd, "src const for move must be the second operand."); + uint32 targetSize = insn.GetOperand(idx).GetSize(); + if (src->GetSize() != targetSize) { + src = static_cast(src->Clone(*cgFunc->GetMemoryPool())); + CHECK_FATAL(src != nullptr, "pointer result is null"); + src->SetSize(targetSize); + } + if (src->IsSingleInstructionMovable()) { + if (EBO_DUMP) { + LogInfo::MapleLogger() << " Do constprop:Prop constval " << src->GetValue() << "into insn:\n"; + insn.Dump(); + } + insn.SetOperand(kInsnSecondOpnd, *src); + MOperator mOp = (mopCode == MOP_wmovrr) ? MOP_xmovri32 : MOP_xmovri64; + insn.SetMOperator(mOp); + if (EBO_DUMP) { + LogInfo::MapleLogger() << " after constprop the insn is:\n"; + insn.Dump(); + } + return true; + } + break; + } + case MOP_xaddrrr: + case MOP_waddrrr: + case MOP_xsubrrr: + case MOP_wsubrrr: { + if ((idx != kInsnThirdOpnd) || !src->IsInBitSize(kMaxAarch64ImmVal24Bits) || + !(src->IsInBitSize(kMaxAarch64ImmVal12Bits) || + src->IsInBitSize(kMaxAarch64ImmVal12Bits, kMaxAarch64ImmVal12Bits))) { + return false; + } + Operand &result = insn.GetOperand(0); + bool is64Bits = (result.GetSize() == k64BitSize); + if (EBO_DUMP) { + LogInfo::MapleLogger() << " Do constprop:Prop constval " << src->GetValue() << "into insn:\n"; + insn.Dump(); + } + if (src->IsZero()) { + MOperator mOp = is64Bits ? MOP_xmovrr : MOP_wmovrr; + insn.SetMOP(mOp); + insn.PopBackOperand(); + if (EBO_DUMP) { + LogInfo::MapleLogger() << " after constprop the insn is:\n"; + insn.Dump(); + } + return true; + } + insn.SetOperand(kInsnThirdOpnd, *src); + if ((mopCode == MOP_xaddrrr) || (mopCode == MOP_waddrrr)) { + is64Bits ? insn.SetMOperator(MOP_xaddrri12) : insn.SetMOperator(MOP_waddrri12); + } else if ((mopCode == MOP_xsubrrr) || (mopCode == MOP_wsubrrr)) { + is64Bits ? insn.SetMOperator(MOP_xsubrri12) : insn.SetMOperator(MOP_wsubrri12); + } + if (EBO_DUMP) { + LogInfo::MapleLogger() << " after constprop the insn is:\n"; + insn.Dump(); + } + return true; + } + default: + break; + } + return false; +} + +/* Constant folding */ +bool AArch64Ebo::DoConstantFold(Insn &insn, const MapleVector &opnds) { + MOperator opCode = insn.GetMachineOpcode(); + + if (insn.GetOpndNum() == 0) { + return false; + } + + Operand *res = insn.GetResult(0); + + ASSERT(res != nullptr, "expect a register"); + ASSERT(res->IsRegister(), "expect a register"); + /* only do integers */ + RegOperand *reg = static_cast(res); + if ((res == nullptr) || (!reg->IsOfIntClass())) { + return false; + } + /* csel ->cset */ + if ((opCode == MOP_wcselrrrc) || (opCode == MOP_xcselrrrc)) { + Operand *op0 = opnds.at(kInsnSecondOpnd); + Operand *op1 = opnds.at(kInsnThirdOpnd); + AArch64ImmOperand *imm0 = nullptr; + AArch64ImmOperand *imm1 = nullptr; + if (op0->IsImmediate()) { + imm0 = static_cast(op0); + } + if (op1->IsImmediate()) { + imm1 = static_cast(op1); + } + + bool reverse = (imm1 != nullptr) && imm1->IsOne() && + (((imm0 != nullptr) && imm0->IsZero()) || op0->IsZeroRegister()); + if (((imm0 != nullptr) && imm0->IsOne() && (((imm1 != nullptr) && imm1->IsZero()) || op1->IsZeroRegister())) || + reverse) { + if (EBO_DUMP) { + LogInfo::MapleLogger() << "change csel insn :\n"; + insn.Dump(); + } + Operand *result = insn.GetResult(0); + Operand &condOperand = insn.GetOperand(kInsnFourthOpnd); + if (!reverse) { + Insn &newInsn = cgFunc->GetCG()->BuildInstruction( + (opCode == MOP_xcselrrrc) ? MOP_xcsetrc : MOP_wcsetrc, *result, condOperand); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "to cset insn ====>\n"; + newInsn.Dump(); + } + } else { + auto &cond = static_cast(condOperand); + if (!CheckCondCode(cond)) { + return false; + } + CondOperand &reverseCond = a64CGFunc->GetCondOperand(GetReverseCond(cond)); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction( + (opCode == MOP_xcselrrrc) ? MOP_xcsetrc : MOP_wcsetrc, *result, reverseCond); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "to cset insn ====>\n"; + newInsn.Dump(); + } + } + return true; + } + } + return false; +} + +/* Look at an exression that has a constant second operand and attempt to simplify the computations. */ +bool AArch64Ebo::ConstantOperand(Insn &insn, const MapleVector &opnds, + const MapleVector &opndInfo) { + BB *bb = insn.GetBB(); + bool result = false; + if (insn.GetOpndNum() < 1) { + return false; + } + ASSERT(opnds.size() > 1, "opnds size must greater than 1"); + Operand *op0 = opnds[kInsnSecondOpnd]; + Operand *op1 = opnds[kInsnThirdOpnd]; + Operand *res = insn.GetResult(0); + CHECK_FATAL(res != nullptr, "null ptr check"); + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + uint32 opndSize = md->GetOperandSize(); + bool first = op0->IsConstant() && !op1->IsConstant(); + ASSERT((op1->IsConstant() && !op0->IsConstant()) || (op0->IsConstant() && !op1->IsConstant()), + "op0 or op1 must be constant op"); + AArch64ImmOperand *immOpnd = nullptr; + Operand *op = nullptr; + int32 idx0 = kInsnSecondOpnd; + if (first) { + immOpnd = static_cast(op0); + op = op1; + if (op->IsMemoryAccessOperand()) { + op = &(insn.GetOperand(kInsnThirdOpnd)); + } + idx0 = kInsnThirdOpnd; + } else { + immOpnd = static_cast(op1); + op = op0; + if (op->IsMemoryAccessOperand()) { + op = &(insn.GetOperand(kInsnSecondOpnd)); + } + } + /* For orr insn and one of the opnd is zero */ + if (((insn.GetMachineOpcode() == MOP_wiorrri12) || (insn.GetMachineOpcode() == MOP_xiorrri13) || + (insn.GetMachineOpcode() == MOP_xiorri13r) || (insn.GetMachineOpcode() == MOP_wiorri12r)) && + immOpnd->IsZero()) { + MOperator mOp = opndSize == k64BitSize ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *res, *op); + bb->ReplaceInsn(insn, newInsn); + return true; + } + /* For the imm is 0. Then replace the insn by a move insn. */ + if (((MOP_xaddrrr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_sadd) && immOpnd->IsZero()) || + (!first && (MOP_xsubrrr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_ssub) && + immOpnd->IsZero())) { + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(opndSize == k64BitSize ? MOP_xmovrr : MOP_wmovrr, + *res, *op); + bb->ReplaceInsn(insn, newInsn); + return true; + } + + if ((insn.GetMachineOpcode() == MOP_xaddrrr) || (insn.GetMachineOpcode() == MOP_waddrrr)) { + if (immOpnd->IsInBitSize(kMaxAarch64ImmVal24Bits)) { + /* + * ADD Wd|WSP, Wn|WSP, #imm{, shift} ; 32-bit general registers + * ADD Xd|SP, Xn|SP, #imm{, shift} ; 64-bit general registers + * imm : 0 ~ 4095, shift: none, LSL #0, or LSL #12 + * aarch64 assembly takes up to 24-bits, if the lower 12 bits is all 0 + */ + if ((immOpnd->IsInBitSize(kMaxAarch64ImmVal12Bits) || + immOpnd->IsInBitSize(kMaxAarch64ImmVal12Bits, kMaxAarch64ImmVal12Bits))) { + MOperator mOp = opndSize == k64BitSize ? MOP_xaddrri12 : MOP_waddrri12; + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *res, *op, *immOpnd); + bb->ReplaceInsn(insn, newInsn); + result = true; + } + } + } + /* Look for the sequence which can be simpified. */ + if (result || (insn.GetMachineOpcode() == MOP_xaddrri12) || (insn.GetMachineOpcode() == MOP_waddrri12)) { + Insn *prev = opndInfo[idx0]->insn; + if ((prev != nullptr) && ((prev->GetMachineOpcode() == MOP_xaddrri12) || + (prev->GetMachineOpcode() == MOP_waddrri12))) { + OpndInfo *prevInfo0 = opndInfo[idx0]->insnInfo->origOpnd[kInsnSecondOpnd]; + /* if prevop0 has been redefined. skip this optimiztation. */ + if (prevInfo0->redefined) { + return result; + } + Operand &prevOpnd0 = prev->GetOperand(kInsnSecondOpnd); + AArch64ImmOperand &imm0 = static_cast(prev->GetOperand(kInsnThirdOpnd)); + int64_t val = imm0.GetValue() + immOpnd->GetValue(); + AArch64ImmOperand &imm1 = a64CGFunc->CreateImmOperand(val, opndSize, imm0.IsSignedValue()); + if (imm1.IsInBitSize(kMaxAarch64ImmVal24Bits) && (imm1.IsInBitSize(kMaxAarch64ImmVal12Bits) || + imm1.IsInBitSize(kMaxAarch64ImmVal12Bits, kMaxAarch64ImmVal12Bits))) { + MOperator mOp = (opndSize == k64BitSize ? MOP_xaddrri12 : MOP_waddrri12); + bb->ReplaceInsn(insn, cgFunc->GetCG()->BuildInstruction(mOp, *res, prevOpnd0, imm1)); + result = true; + } + } + } + return result; +} + +AArch64CC_t AArch64Ebo::GetReverseCond(const CondOperand &cond) const { + switch (cond.GetCode()) { + case CC_NE: + return CC_EQ; + case CC_EQ: + return CC_NE; + case CC_LT: + return CC_GE; + case CC_GE: + return CC_LT; + case CC_GT: + return CC_LE; + case CC_LE: + return CC_GT; + default: + CHECK_FATAL(0, "Not support yet."); + } + return kCcLast; +} + +/* return true if cond == CC_LE */ +bool AArch64Ebo::CheckCondCode(const CondOperand &cond) const { + switch (cond.GetCode()) { + case CC_NE: + case CC_EQ: + case CC_LT: + case CC_GE: + case CC_GT: + case CC_LE: + return true; + default: + return false; + } +} + +/* Do some special pattern */ +bool AArch64Ebo::SpecialSequence(Insn &insn, const MapleVector &origInfos) { + MOperator opCode = insn.GetMachineOpcode(); + AArch64CGFunc *aarchFunc = static_cast(cgFunc); + switch (opCode) { + /* + * mov R503, R0 + * mov R0, R503 + * ==> mov R0, R0 + */ + case MOP_wmovrr: + case MOP_xmovrr: { + OpndInfo *opndInfo = origInfos[kInsnSecondOpnd]; + if (opndInfo == nullptr) { + return false; + } + Insn *prevInsn = opndInfo->insn; + if ((prevInsn != nullptr) && (prevInsn->GetMachineOpcode() == opCode) && + (prevInsn == insn.GetPreviousMachineInsn()) && + !RegistersIdentical(prevInsn->GetOperand(kInsnFirstOpnd), prevInsn->GetOperand(kInsnSecondOpnd)) && + !RegistersIdentical(insn.GetOperand(kInsnFirstOpnd), insn.GetOperand(kInsnSecondOpnd))) { + Operand *reg1 = insn.GetResult(0); + Operand ®2 = prevInsn->GetOperand(kInsnSecondOpnd); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(insn.GetMachineOpcode(), *reg1, reg2); + insn.GetBB()->ReplaceInsn(insn, newInsn); + return true; + } + break; + } + /* + * lsl x1, x1, #3 + * add x0, x0, x1 + * ===> add x0, x0, x1, 3({MOP_xaddrrrs, + * {MOPD_Reg64ID,MOPD_Reg64IS,MOPD_Reg64IS,MOPD_BitShift64,MOPD_Undef},0,"add","0,1,2,3", 1, 3}) + */ + case MOP_xaddrrr: + case MOP_waddrrr: { + if (insn.GetResult(0) == nullptr) { + return false; + } + bool is64bits = (insn.GetResult(0)->GetSize() == k64BitSize); + Operand &op0 = insn.GetOperand(kInsnSecondOpnd); + OpndInfo *opndInfo = origInfos.at(kInsnThirdOpnd); + if ((opndInfo != nullptr) && (opndInfo->insn != nullptr)) { + Insn *insn1 = opndInfo->insn; + InsnInfo *insnInfo1 = opndInfo->insnInfo; + CHECK_NULL_FATAL(insnInfo1); + MOperator opc1 = insn1->GetMachineOpcode(); + if ((opc1 == MOP_xlslrri6) || (opc1 == MOP_wlslrri5)) { + /* don't use register if it was redefined. */ + OpndInfo *opndInfo1 = insnInfo1->origOpnd[kInsnSecondOpnd]; + if ((opndInfo1 != nullptr) && opndInfo1->redefined) { + return false; + } + Operand &res = insn.GetOperand(kInsnFirstOpnd); + Operand &opnd1 = insn1->GetOperand(kInsnSecondOpnd); + auto &immOpnd = static_cast(insn1->GetOperand(kInsnThirdOpnd)); + uint32 xLslrriBitLen = 6; + uint32 wLslrriBitLen = 5; + Operand &shiftOpnd = aarchFunc->CreateBitShiftOperand( + BitShiftOperand::kLSL, immOpnd.GetValue(), (opCode == MOP_xlslrri6) ? xLslrriBitLen : wLslrriBitLen); + MOperator mOp = (is64bits ? MOP_xaddrrrs : MOP_waddrrrs); + insn.GetBB()->ReplaceInsn(insn, cgFunc->GetCG()->BuildInstruction(mOp, res, op0, + opnd1, shiftOpnd)); + return true; + } + } + break; + } + case MOP_wstr: + case MOP_xstr: + case MOP_wldr: + case MOP_xldr: { + /* + * add x2, x1, imm + * ldr x3, [x2] + * -> ldr x3, [x1, imm] + * --------------------- + * add x2, x1, imm + * str x3, [x2] + * -> str x3, [x1, imm] + */ + CHECK_NULL_FATAL(insn.GetResult(0)); + OpndInfo *opndInfo = origInfos[kInsnSecondOpnd]; + if (insn.IsLoad() && opndInfo == nullptr) { + return false; + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + bool is64bits = md->Is64Bit(); + uint32 size = md->GetOperandSize(); + OpndInfo *baseInfo = nullptr; + MemOperand *memOpnd = nullptr; + if (insn.IsLoad()) { + MemOpndInfo *memInfo = static_cast(opndInfo); + baseInfo = memInfo->GetBaseInfo(); + memOpnd = static_cast(memInfo->opnd); + } else { + Operand *res = insn.GetResult(0); + ASSERT(res->IsMemoryAccessOperand(), "res must be MemoryAccessOperand"); + memOpnd = static_cast(res); + Operand *base = memOpnd->GetBaseRegister(); + ASSERT(base->IsRegister(), "base must be Register"); + baseInfo = GetOpndInfo(*base, -1); + } + + if (static_cast(memOpnd)->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + + if ((baseInfo != nullptr) && (baseInfo->insn != nullptr)) { + Insn *insn1 = baseInfo->insn; + InsnInfo *insnInfo1 = baseInfo->insnInfo; + CHECK_NULL_FATAL(insnInfo1); + MOperator opc1 = insn1->GetMachineOpcode(); + if ((opc1 == MOP_xaddrri12) || (opc1 == MOP_waddrri12)) { + if (memOpnd->GetOffset() == nullptr) { + return false; + } + AArch64ImmOperand *imm0 = static_cast(memOpnd->GetOffset()); + if (imm0 == nullptr) { + return false; + } + int64 imm0Val = imm0->GetValue(); + Operand &res = insn.GetOperand(kInsnFirstOpnd); + RegOperand *op1 = &static_cast(insn1->GetOperand(kInsnSecondOpnd)); + AArch64ImmOperand &imm1 = static_cast(insn1->GetOperand(kInsnThirdOpnd)); + int64 immVal; + /* don't use register if it was redefined. */ + OpndInfo *opndInfo1 = insnInfo1->origOpnd[kInsnSecondOpnd]; + if ((opndInfo1 != nullptr) && opndInfo1->redefined) { + /* + * add x2, x1, imm0, LSL imm1 + * add x2, x2, imm2 + * ldr x3, [x2] + * -> ldr x3, [x1, imm] + * ---------------------------- + * add x2, x1, imm0, LSL imm1 + * add x2, x2, imm2 + * str x3, [x2] + * -> str x3, [x1, imm] + */ + Insn *insn2 = opndInfo1->insn; + if (insn2 == nullptr) { + return false; + } + MOperator opCode2 = insn2->GetMachineOpcode(); + if ((opCode2 != MOP_xaddrri24) && (opCode2 != MOP_waddrri24)) { + return false; + } + auto &res2 = static_cast(insn2->GetOperand(kInsnFirstOpnd)); + auto &base2 = static_cast(insn2->GetOperand(kInsnSecondOpnd)); + auto &immOpnd2 = static_cast(insn2->GetOperand(kInsnThirdOpnd)); + auto &res1 = static_cast(insn1->GetOperand(kInsnFirstOpnd)); + if (RegistersIdentical(res1, *op1) && RegistersIdentical(res1, res2) && + (GetOpndInfo(base2, -1) != nullptr) && !GetOpndInfo(base2, -1)->redefined) { + immVal = + imm0Val + imm1.GetValue() + (static_cast(immOpnd2.GetValue()) << kMaxAarch64ImmVal12Bits); + op1 = &base2; + } else { + return false; + } + } else { + immVal = imm0Val + imm1.GetValue(); + } + + /* multiple of 4 and 8 */ + const int multiOfFour = 4; + const int multiOfEight = 8; + if ((!is64bits && (immVal < kStrLdrImm32UpperBound) && (immVal % multiOfFour == 0)) || + (is64bits && (immVal < kStrLdrImm64UpperBound) && (immVal % multiOfEight == 0))) { + MemOperand &mo = aarchFunc->CreateMemOpnd(*op1, immVal, size); + Insn &ldrInsn = cgFunc->GetCG()->BuildInstruction(opCode, res, mo); + insn.GetBB()->ReplaceInsn(insn, ldrInsn); + return true; + } + } + } + break; + } /* end case MOP_xldr */ + case MOP_xcsetrc: + case MOP_wcsetrc: { + /* i. cmp x0, x1 + * cset w0, EQ ===> cmp x0, x1 + * cmp w0, #0 cset w0, EQ + * cset w0, NE + * + * ii. cmp x0, x1 + * cset w0, EQ ===> cmp x0, x1 + * cmp w0, #0 cset w0, NE + * cset w0, EQ + * + * a.< -1 : 0x20ff25e0 > < 0 > cmp(226) (opnd0: vreg:C105 class: [CC]) (opnd1: vreg:R104 class: [I]) (opnd2: + * vreg:R106 class: [I]) + * b.< -1 : 0x20ff60a0 > < 0 > cset(72) (opnd0: vreg:R101 class: [I]) (opnd1: CC: EQ) + * c.< -1* : 0x20ff3870 > < 0 > cmp(223) (opnd0: vreg:C105 class: [CC]) (opnd1: vreg:R101 class: [I]) (opnd2: + * imm:0) + * d.< * -1 : 0x20ff3908 > < 0 > cset(72) (opnd0: vreg:R107 class: [I]) (opnd1: CC: NE) + * d1.< -1 : 0x20ff3908 > < 0 > * cset(72) (opnd0: vreg:R107 class: [I]) (opnd1: CC: EQ) i, d + * ===> mov R107 R101 ii, a,b,c,d1 ===> a,b,cset Rxx + * NE, c, mov R107 Rxx + */ + auto &cond = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if ((cond.GetCode() != CC_NE) && (cond.GetCode() != CC_EQ)) { + return false; + } + bool reverse = (cond.GetCode() == CC_EQ); + OpndInfo *condInfo = origInfos[kInsnSecondOpnd]; + if ((condInfo != nullptr) && condInfo->insn) { + Insn *cmp1 = condInfo->insn; + if ((cmp1->GetMachineOpcode() == MOP_xcmpri) || (cmp1->GetMachineOpcode() == MOP_wcmpri)) { + InsnInfo *cmpInfo1 = condInfo->insnInfo; + CHECK_FATAL(cmpInfo1 != nullptr, "pointor cmpInfo1 is null"); + OpndInfo *info0 = cmpInfo1->origOpnd[kInsnSecondOpnd]; + /* if R101 was not redefined. */ + if ((info0 != nullptr) && (info0->insnInfo != nullptr) && (info0->insn != nullptr) && + (reverse || !info0->redefined) && cmp1->GetOperand(kInsnThirdOpnd).IsImmediate()) { + Insn *csetInsn = info0->insn; + MOperator opc1 = csetInsn->GetMachineOpcode(); + if (((opc1 == MOP_xcsetrc) || (opc1 == MOP_wcsetrc)) && + static_cast(cmp1->GetOperand(kInsnThirdOpnd)).IsZero()) { + CondOperand &cond1 = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if (!CheckCondCode(cond1)) { + return false; + } + if (EBO_DUMP) { + LogInfo::MapleLogger() << "< === do specical condition optimization, replace insn ===> \n"; + insn.Dump(); + } + Operand *result = insn.GetResult(0); + CHECK_FATAL(result != nullptr, "pointor result is null"); + uint32 size = result->GetSize(); + if (reverse) { + /* After regalloction, we can't create a new register. */ + if (!beforeRegAlloc) { + return false; + } + AArch64CGFunc *aarFunc = static_cast(cgFunc); + Operand &r = aarFunc->CreateRegisterOperandOfType(static_cast(result)->GetRegisterType(), + size / kBitsPerByte); + /* after generate a new vreg, check if the size of DataInfo is big enough */ + EnlargeSpaceForLA(*csetInsn); + CondOperand &cond2 = aarFunc->GetCondOperand(GetReverseCond(cond1)); + Insn &newCset = cgFunc->GetCG()->BuildInstruction( + result->GetSize() == k64BitSize ? MOP_xcsetrc : MOP_wcsetrc, r, cond2); + /* new_cset use the same cond as cset_insn. */ + IncRef(*info0->insnInfo->origOpnd[kInsnSecondOpnd]); + csetInsn->GetBB()->InsertInsnAfter(*csetInsn, newCset); + MOperator mOp = (result->GetSize() == k64BitSize ? MOP_xmovrr : MOP_wmovrr); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *result, r); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "< === with new insn ===> \n"; + newInsn.Dump(); + } + } else { + Operand *result1 = csetInsn->GetResult(0); + MOperator mOp = ((result->GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *result, *result1); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "< === with new insn ===> \n"; + newInsn.Dump(); + } + } + return true; + } + } + } + } + } /* end case MOP_wcsetrc */ + [[clang::fallthrough]]; + default: + break; + } + return false; +} + +/* + * *iii. mov w16, v10.s[1] // FMOV from simd 105 ---> replace_insn + * mov w1, w16 ----->insn + * ==> + * mov w1, v10.s[1] + */ +bool AArch64Ebo::IsMovToSIMDVmov(Insn &insn, const Insn &replaceInsn) const { + if (insn.GetMachineOpcode() == MOP_wmovrr && replaceInsn.GetMachineOpcode() == MOP_xvmovrv) { + insn.SetMOperator(replaceInsn.GetMachineOpcode()); + return true; + } + return false; +} + +bool AArch64Ebo::ChangeLdrMop(Insn &insn, const Operand &opnd) const { + ASSERT(insn.IsLoad(), "expect insn is load in ChangeLdrMop"); + ASSERT(opnd.IsRegister(), "expect opnd is a register in ChangeLdrMop"); + + const RegOperand *regOpnd = static_cast(&opnd); + ASSERT(static_cast(insn.GetOperand(kInsnFirstOpnd)).GetRegisterType() != regOpnd->GetRegisterType(), + "expected matched register type in AArch64Ebo::ChangeLdrMop"); + if (static_cast(insn.GetOperand(kInsnSecondOpnd)).GetIndexRegister()) { + return false; + } + + bool bRet = true; + if (regOpnd->GetRegisterType() == kRegTyFloat) { + switch (insn.GetMachineOpcode()) { + case MOP_wldrb: + insn.SetMOperator(MOP_bldr); + break; + case MOP_wldrh: + insn.SetMOperator(MOP_hldr); + break; + case MOP_wldr: + insn.SetMOperator(MOP_sldr); + break; + case MOP_xldr: + insn.SetMOperator(MOP_dldr); + break; + case MOP_wldli: + insn.SetMOperator(MOP_sldli); + break; + case MOP_xldli: + insn.SetMOperator(MOP_dldli); + break; + case MOP_wldrsb: + case MOP_wldrsh: + default: + bRet = false; + break; + } + } else if (regOpnd->GetRegisterType() == kRegTyInt) { + switch (insn.GetMachineOpcode()) { + case MOP_bldr: + insn.SetMOperator(MOP_wldrb); + break; + case MOP_hldr: + insn.SetMOperator(MOP_wldrh); + break; + case MOP_sldr: + insn.SetMOperator(MOP_wldr); + break; + case MOP_dldr: + insn.SetMOperator(MOP_xldr); + break; + case MOP_sldli: + insn.SetMOperator(MOP_wldli); + break; + case MOP_dldli: + insn.SetMOperator(MOP_xldli); + break; + default: + bRet = false; + break; + } + } else { + ASSERT(false, "Internal error."); + } + return bRet; +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp b/src/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..23972a79cd10d7f9f58ce06a9c1f92d5912fbee9 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_fixshortbranch.h" +#include "cg.h" +#include "mpl_logging.h" +#include "common_utils.h" + +namespace maplebe { +/* + * Check the distance between the first insn of BB with the lable(targ_labidx) + * and the insn with targ_id. If the distance greater than kShortBRDistance + * return false. + */ +bool AArch64FixShortBranch::DistanceCheck(const BB &bb, LabelIdx targLabIdx, uint32 targId) { + for (auto *tBB : bb.GetSuccs()) { + if (tBB->GetLabIdx() != targLabIdx) { + continue; + } + Insn *tInsn = tBB->GetFirstInsn(); + while (tInsn == nullptr || !tInsn->IsMachineInstruction()) { + if (tInsn == nullptr) { + tBB = tBB->GetNext(); + tInsn = tBB->GetFirstInsn(); + } else { + tInsn = tInsn->GetNext(); + } + } + uint32 tmp = (tInsn->GetId() > targId) ? (tInsn->GetId() - targId) : (targId - tInsn->GetId()); + return (tmp < kShortBRDistance); + } + CHECK_FATAL(false, "CFG error"); +} + +void AArch64FixShortBranch::SetInsnId(){ + uint32 i = 0; + AArch64CGFunc *aarch64CGFunc = static_cast(cgFunc); + FOR_ALL_BB(bb, aarch64CGFunc) { + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + i += insn->GetAtomicNum(); + insn->SetId(i); + if (insn->GetMachineOpcode() == MOP_adrp_ldr && CGOptions::IsLazyBinding() && !cgFunc->GetCG()->IsLibcore()) { + /* For 1 additional EmitLazyBindingRoutine in lazybinding + * see function AArch64Insn::Emit in file aarch64_insn.cpp + */ + ++i; + } + } + } +} + +/* + * TBZ/TBNZ instruction is generated under -O2, these branch instructions only have a range of +/-32KB. + * If the branch target is not reachable, we split tbz/tbnz into combination of ubfx and cbz/cbnz, which + * will clobber one extra register. With LSRA under -O2, we can use one of the reserved registers R16 for + * that purpose. To save compile time, we do this change when there are more than 32KB / 4 instructions + * in the function. + */ +void AArch64FixShortBranch::FixShortBranches() { + AArch64CGFunc *aarch64CGFunc = static_cast(cgFunc); + SetInsnId(); + FOR_ALL_BB(bb, aarch64CGFunc) { + /* Do a backward scan searching for short branches */ + FOR_BB_INSNS_REV(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + MOperator thisMop = insn->GetMachineOpcode(); + if (thisMop != MOP_wtbz && thisMop != MOP_wtbnz && thisMop != MOP_xtbz && thisMop != MOP_xtbnz) { + continue; + } + LabelOperand &label = static_cast(insn->GetOperand(kInsnThirdOpnd)); + /* should not be commented out after bug fix */ + if (DistanceCheck(*bb, label.GetLabelIndex(), insn->GetId())) { + continue; + } + auto ® = static_cast(insn->GetOperand(kInsnFirstOpnd)); + ImmOperand &bitSize = aarch64CGFunc->CreateImmOperand(1, k8BitSize, false); + auto &bitPos = static_cast(insn->GetOperand(kInsnSecondOpnd)); + MOperator ubfxOp = MOP_undef; + MOperator cbOp = MOP_undef; + switch (thisMop) { + case MOP_wtbz: + ubfxOp = MOP_wubfxrri5i5; + cbOp = MOP_wcbz; + break; + case MOP_wtbnz: + ubfxOp = MOP_wubfxrri5i5; + cbOp = MOP_wcbnz; + break; + case MOP_xtbz: + ubfxOp = MOP_xubfxrri6i6; + cbOp = MOP_xcbz; + break; + case MOP_xtbnz: + ubfxOp = MOP_xubfxrri6i6; + cbOp = MOP_xcbnz; + break; + default: + break; + } + AArch64RegOperand &tmp = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(R16, (ubfxOp == MOP_wubfxrri5i5) ? k32BitSize : k64BitSize, + kRegTyInt); + (void)bb->InsertInsnAfter(*insn, cg->BuildInstruction(cbOp, tmp, label)); + (void)bb->InsertInsnAfter(*insn, cg->BuildInstruction(ubfxOp, tmp, reg, bitPos, bitSize)); + bb->RemoveInsn(*insn); + break; + } + } +} + +AnalysisResult *CgFixShortBranch::Run(CGFunc *cgFunc, CgFuncResultMgr *cgFuncResultMgr) { + (void)cgFuncResultMgr; + ASSERT(cgFunc != nullptr, "nullptr check"); + MemPool *memPool = memPoolCtrler.NewMemPool("fixShortBranches"); + auto *fixShortBranch = memPool->New(cgFunc); + CHECK_FATAL(fixShortBranch != nullptr, "AArch64FixShortBranch instance create failure"); + fixShortBranch->FixShortBranches(); + memPoolCtrler.DeleteMemPool(memPool); + return nullptr; +} +} /* namespace maplebe */ + diff --git a/src/maple_be/src/cg/aarch64/aarch64_global.cpp b/src/maple_be/src/cg/aarch64/aarch64_global.cpp new file mode 100644 index 0000000000000000000000000000000000000000..94840512258340ffbb89f0e83e8ee205adabe0c3 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_global.cpp @@ -0,0 +1,971 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_global.h" +#include "aarch64_reaching.h" +#include "aarch64_cg.h" +#include "aarch64_live.h" + +namespace maplebe { +using namespace maple; + +void AArch64GlobalOpt::Run() { + if (cgFunc.NumBBs() > kMaxBBNum || cgFunc.GetRD()->GetMaxInsnNO() > kMaxInsnNum) { + return; + } + OptimizeManager optManager(cgFunc); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); +} + +/* if used Operand in insn is defined by zero in all define insn, return true */ +bool OptimizePattern::OpndDefByZero(Insn &insn, int32 useIdx) const { + ASSERT(insn.GetOperand(useIdx).IsRegister(), "the used Operand must be Register"); + /* Zero Register don't need be defined */ + if (insn.GetOperand(useIdx).IsZeroRegister()) { + return true; + } + + InsnSet defInsns = cgFunc.GetRD()->FindDefForRegOpnd(insn, useIdx); + ASSERT(!defInsns.empty(), "operand must be defined before used"); + for (auto &defInsn : defInsns) { + if (!InsnDefZero(*defInsn)) { + return false; + } + } + return true; +} + +/* if used Operand in insn is defined by one in all define insn, return true */ +bool OptimizePattern::OpndDefByOne(Insn &insn, int32 useIdx) const { + ASSERT(insn.GetOperand(useIdx).IsRegister(), "the used Operand must be Register"); + /* Zero Register don't need be defined */ + if (insn.GetOperand(useIdx).IsZeroRegister()) { + return false; + } + InsnSet defInsns = cgFunc.GetRD()->FindDefForRegOpnd(insn, useIdx); + ASSERT(!defInsns.empty(), "operand must be defined before used"); + for (auto &defInsn : defInsns) { + if (!InsnDefOne(*defInsn)) { + return false; + } + } + return true; +} + +/* if used Operand in insn is defined by one valid bit in all define insn, return true */ +bool OptimizePattern::OpndDefByOneOrZero(Insn &insn, int32 useIdx) const { + if (insn.GetOperand(useIdx).IsZeroRegister()) { + return true; + } + + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(insn, useIdx); + ASSERT(!defInsnSet.empty(), "Operand must be defined before used"); + + for (auto &defInsn : defInsnSet) { + if (!InsnDefOneOrZero(*defInsn)) { + return false; + } + } + return true; +} + +/* if defined operand(must be first insn currently) in insn is const one, return true */ +bool OptimizePattern::InsnDefOne(Insn &insn) { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &srcOpnd = insn.GetOperand(1); + ASSERT(srcOpnd.IsIntImmediate(), "expects ImmOperand"); + ImmOperand &srcConst = static_cast(srcOpnd); + int64 srcConstValue = srcConst.GetValue(); + if (srcConstValue == 1) { + return true; + } + return false; + } + default: + return false; + } +} + +/* if defined operand(must be first insn currently) in insn is const zero, return true */ +bool OptimizePattern::InsnDefZero(Insn &insn) { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &srcOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(srcOpnd.IsIntImmediate(), "expects ImmOperand"); + ImmOperand &srcConst = static_cast(srcOpnd); + int64 srcConstValue = srcConst.GetValue(); + if (srcConstValue == 0) { + return true; + } + return false; + } + case MOP_xmovrr: + case MOP_wmovrr: + return insn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + default: + return false; + } +} + +/* if defined operand(must be first insn currently) in insn has only one valid bit, return true */ +bool OptimizePattern::InsnDefOneOrZero(Insn &insn) { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_wcsetrc: + case MOP_xcsetrc: + return true; + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + if (defConstValue != 0 && defConstValue != 1) { + return false; + } else { + return true; + } + } + case MOP_xmovrr: + case MOP_wmovrr: { + return insn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + } + case MOP_wlsrrri5: + case MOP_xlsrrri6: { + Operand &opnd2 = insn.GetOperand(kInsnThirdOpnd); + ASSERT(opnd2.IsIntImmediate(), "expects ImmOperand"); + ImmOperand &opndImm = static_cast(opnd2); + int64 shiftBits = opndImm.GetValue(); + if (((defMop == MOP_wlsrrri5) && (shiftBits == k32BitSize - 1)) || + ((defMop == MOP_xlsrrri6) && (shiftBits == k64BitSize - 1))) { + return true; + } else { + return false; + } + } + default: + return false; + } +} + +void OptimizePattern::ReplaceAllUsedOpndWithNewOpnd(const InsnSet &useInsnSet, uint32 regNO, + Operand &newOpnd, bool updateInfo) const { + for (auto useInsn : useInsnSet) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(useInsn)->GetMachineOpcode()]; + uint32 opndNum = useInsn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = useInsn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsRegUse() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == regNO)) { + useInsn->SetOperand(i, newOpnd); + if (updateInfo) { + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } else if (opnd.IsMemoryAccessOperand()) { + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + MemOperand *newMem = nullptr; + if (base != nullptr && (base->GetRegisterNumber() == regNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetBaseRegister(*static_cast(&newOpnd)); + useInsn->SetOperand(i, *newMem); + if (updateInfo) { + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } + if (index != nullptr && (index->GetRegisterNumber() == regNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetIndexRegister(*static_cast(&newOpnd)); + useInsn->SetOperand(i, *newMem); + if (updateInfo) { + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } + } + } + } +} + +bool ForwardPropPattern::CheckCondition(Insn &insn) { + if (!insn.IsMachineInstruction()) { + return false; + } + if ((insn.GetMachineOpcode() != MOP_xmovrr) && (insn.GetMachineOpcode() != MOP_wmovrr)) { + return false; + } + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + RegOperand &firstRegOpnd = static_cast(firstOpnd); + RegOperand &secondRegOpnd = static_cast(secondOpnd); + uint32 firstRegNO = firstRegOpnd.GetRegisterNumber(); + uint32 secondRegNO = secondRegOpnd.GetRegisterNumber(); + if (firstRegOpnd.IsZeroRegister() || !firstRegOpnd.IsVirtualRegister() || !secondRegOpnd.IsVirtualRegister()) { + return false; + } + firstRegUseInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(insn, firstRegNO, true); + if (firstRegUseInsnSet.empty()) { + return false; + } + InsnSet secondRegDefInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(insn, secondRegNO, true); + if (secondRegDefInsnSet.size() != 1 || RegOperand::IsSameReg(firstOpnd, secondOpnd)) { + return false; + } + bool toDoOpt = true; + for (auto useInsn : firstRegUseInsnSet) { + if (!cgFunc.GetRD()->RegIsLiveBetweenInsn(secondRegNO, insn, *useInsn)) { + toDoOpt = false; + break; + } + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(*useInsn, firstRegNO, true); + if (defInsnSet.size() > 1) { + toDoOpt = false; + break; + } + } + return toDoOpt; +} + +void ForwardPropPattern::Optimize(Insn &insn) { + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + RegOperand &firstRegOpnd = static_cast(firstOpnd); + uint32 firstRegNO = firstRegOpnd.GetRegisterNumber(); + + for (auto *useInsn : firstRegUseInsnSet) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(useInsn)->GetMachineOpcode()]; + uint32 opndNum = useInsn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = useInsn->GetOperand(i); + const AArch64OpndProp *regProp = md->GetOperand(i); + if (!regProp->IsRegUse() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == firstRegNO)) { + useInsn->SetOperand(i, secondOpnd); + if (((useInsn->GetMachineOpcode() == MOP_xmovrr) || (useInsn->GetMachineOpcode() == MOP_wmovrr)) && + (static_cast(useInsn->GetOperand(kInsnSecondOpnd)).IsVirtualRegister()) && + (static_cast(useInsn->GetOperand(kInsnFirstOpnd)).IsVirtualRegister())) { + modifiedBB.insert(useInsn->GetBB()); + } + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } else if (opnd.IsMemoryAccessOperand()) { + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + MemOperand *newMem = nullptr; + if (base != nullptr && (base->GetRegisterNumber() == firstRegNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetBaseRegister(static_cast(secondOpnd)); + useInsn->SetOperand(i, *newMem); + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + if ((index != nullptr) && (index->GetRegisterNumber() == firstRegNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetIndexRegister(static_cast(secondOpnd)); + useInsn->SetOperand(i, *newMem); + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } + } + } + insn.SetOperand(0, secondOpnd); + cgFunc.GetRD()->UpdateInOut(*insn.GetBB(), true); +} + +void ForwardPropPattern::Init() { + firstRegUseInsnSet.clear(); +} + +void ForwardPropPattern::Run() { + bool secondTime = false; + do { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsUnreachable() || (secondTime && modifiedBB.find(bb) == modifiedBB.end())) { + continue; + } + + if (secondTime) { + modifiedBB.erase(bb); + } + + FOR_BB_INSNS(insn, bb) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } + secondTime = true; + } while (!modifiedBB.empty()); +} + +bool BackPropPattern::CheckAndGetOpnd(Insn &insn) { + if (!insn.IsMachineInstruction()) { + return false; + } + if ((insn.GetMachineOpcode() != MOP_xmovrr) && (insn.GetMachineOpcode() != MOP_wmovrr)) { + return false; + } + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + if (RegOperand::IsSameReg(firstOpnd, secondOpnd)) { + return false; + } + + firstRegOpnd = &static_cast(firstOpnd); + secondRegOpnd = &static_cast(secondOpnd); + if (firstRegOpnd->IsZeroRegister() || !secondRegOpnd->IsVirtualRegister()) { + return false; + } + firstRegNO = firstRegOpnd->GetRegisterNumber(); + secondRegNO = secondRegOpnd->GetRegisterNumber(); + return true; +} + +bool BackPropPattern::DestOpndHasUseInsns(Insn &insn) { + BB &bb = *insn.GetBB(); + InsnSet useInsnSetOfFirstOpnd; + bool findRes = cgFunc.GetRD()->FindRegUseBetweenInsn(firstRegNO, insn.GetNext(), + bb.GetLastInsn(), useInsnSetOfFirstOpnd); + if ((findRes && useInsnSetOfFirstOpnd.empty()) || + (!findRes && useInsnSetOfFirstOpnd.empty() && !bb.GetLiveOut()->TestBit(firstRegNO))) { + return false; + } + return true; +} + +bool BackPropPattern::DestOpndLiveOutToEHSuccs(Insn &insn) { + BB &bb = *insn.GetBB(); + for (auto ehSucc : bb.GetEhSuccs()) { + if (ehSucc->GetLiveIn()->TestBit(firstRegNO)) { + return true; + } + } + return false; +} + +bool BackPropPattern::CheckSrcOpndDefAndUseInsns(Insn &insn) { + BB &bb = *insn.GetBB(); + /* secondOpnd is defined in other BB */ + std::vector defInsnVec = cgFunc.GetRD()->FindRegDefBetweenInsn(secondRegNO, bb.GetFirstInsn(), insn.GetPrev()); + if (defInsnVec.size() != 1) { + return false; + } + defInsnForSecondOpnd = defInsnVec.back(); + /* part defined */ + if ((defInsnForSecondOpnd->GetMachineOpcode() == MOP_xmovkri16) || + (defInsnForSecondOpnd->GetMachineOpcode() == MOP_wmovkri16)) { + return false; + } + bool findFinish = cgFunc.GetRD()->FindRegUseBetweenInsn(secondRegNO, defInsnForSecondOpnd->GetNext(), + bb.GetLastInsn(), srcOpndUseInsnSet); + if (!findFinish && bb.GetLiveOut()->TestBit(secondRegNO)) { + return false; + } + return true; +} + +bool BackPropPattern::CheckPredefineInsn(Insn &insn) { + if (insn.GetPrev() == defInsnForSecondOpnd) { + return true; + } + std::vector preDefInsnForFirstOpndVec; + BB &bb = *insn.GetBB(); + if (cgFunc.GetRD()->CheckRegGen(bb, firstRegNO)) { + preDefInsnForFirstOpndVec = + cgFunc.GetRD()->FindRegDefBetweenInsn(firstRegNO, defInsnForSecondOpnd->GetNext(), insn.GetPrev()); + } + if (!preDefInsnForFirstOpndVec.empty()) { + return false; + } + /* there is no predefine insn in current bb */ + InsnSet useInsnSetForFirstOpnd; + cgFunc.GetRD()->FindRegUseBetweenInsn(firstRegNO, defInsnForSecondOpnd->GetNext(), insn.GetPrev(), + useInsnSetForFirstOpnd); + if (!useInsnSetForFirstOpnd.empty()) { + return false; + } + return true; +} + +bool BackPropPattern::CheckRedefineInsn(Insn &insn) { + for (auto useInsn : srcOpndUseInsnSet) { + if ((useInsn->GetId() > insn.GetId()) && (insn.GetNext() != useInsn) && + !cgFunc.GetRD()->FindRegDefBetweenInsn(firstRegNO, insn.GetNext(), useInsn->GetPrev()).empty()) { + return false; + } + } + return true; +} + +bool BackPropPattern::CheckCondition(Insn &insn) { + if (!CheckAndGetOpnd(insn)) { + return false; + } + if (!DestOpndHasUseInsns(insn)) { + return false; + } + /* first register must not be live out to eh_succs */ + if (DestOpndLiveOutToEHSuccs(insn)) { + return false; + } + if (!CheckSrcOpndDefAndUseInsns(insn)) { + return false; + } + /* check predefine insn */ + if (!CheckPredefineInsn(insn)) { + return false; + } + /* check redefine insn */ + if (!CheckRedefineInsn(insn)) { + return false; + } + return true; +} + +void BackPropPattern::Optimize(Insn &insn) { + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + ReplaceAllUsedOpndWithNewOpnd(srcOpndUseInsnSet, secondRegNO, firstOpnd, false); + /* replace define insn */ + const AArch64MD *md = &AArch64CG::kMd[static_cast(defInsnForSecondOpnd)->GetMachineOpcode()]; + uint32 opndNum = defInsnForSecondOpnd->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = defInsnForSecondOpnd->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsRegDef() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == secondRegNO)) { + defInsnForSecondOpnd->SetOperand(i, firstOpnd); + } else if (opnd.IsMemoryAccessOperand()) { + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if (base != nullptr && memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && base->GetRegisterNumber() == secondRegNO) { + MemOperand *newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetBaseRegister(static_cast(firstOpnd)); + defInsnForSecondOpnd->SetOperand(i, *newMem); + } + } + } + insn.GetBB()->RemoveInsn(insn); +} + +void BackPropPattern::Init() { + firstRegOpnd = nullptr; + secondRegOpnd = nullptr; + firstRegNO = 0; + secondRegNO = 0; + srcOpndUseInsnSet.clear(); + defInsnForSecondOpnd = nullptr; +} + +void BackPropPattern::Run() { + bool secondTime = false; + std::set modifiedBB; + do { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsUnreachable() || (secondTime && modifiedBB.find(bb) == modifiedBB.end())) { + continue; + } + + if (secondTime) { + modifiedBB.erase(bb); + } + + FOR_BB_INSNS_REV(insn, bb) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + modifiedBB.insert(bb); + Optimize(*insn); + } + cgFunc.GetRD()->UpdateInOut(*bb); + } + secondTime = true; + } while (!modifiedBB.empty()); +} + +bool CmpCsetPattern::CheckCondition(Insn &insn) { + nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr || !insn.IsMachineInstruction()) { + return false; + } + + MOperator firstMop = insn.GetMachineOpcode(); + MOperator secondMop = nextInsn->GetMachineOpcode(); + if (!(((firstMop == MOP_wcmpri) || (firstMop == MOP_xcmpri)) && + ((secondMop == MOP_wcsetrc) || (secondMop == MOP_xcsetrc)))) { + return false; + } + + /* get cmp_first operand */ + cmpFirstOpnd = &(insn.GetOperand(kInsnSecondOpnd)); + /* get cmp second Operand, ImmOperand must be 0 or 1 */ + cmpSecondOpnd = &(insn.GetOperand(kInsnThirdOpnd)); + ASSERT(cmpSecondOpnd->IsIntImmediate(), "expects ImmOperand"); + ImmOperand *cmpConstOpnd = static_cast(cmpSecondOpnd); + cmpConstVal = cmpConstOpnd->GetValue(); + /* get cset first Operand */ + csetFirstOpnd = &(nextInsn->GetOperand(kInsnFirstOpnd)); + if (((cmpConstVal != 0) && (cmpConstVal != 1)) || (cmpFirstOpnd->GetSize() != csetFirstOpnd->GetSize()) || + !OpndDefByOneOrZero(insn, 1)) { + return false; + } + + InsnSet useInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(insn, 0, false); + if (useInsnSet.size() > 1) { + return false; + } + return true; +} + +void CmpCsetPattern::Optimize(Insn &insn) { + Insn *csetInsn = nextInsn; + BB &bb = *insn.GetBB(); + nextInsn = nextInsn->GetNextMachineInsn(); + /* get condition Operand */ + CondOperand &cond = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if (((cmpConstVal == 0) && (cond.GetCode() == CC_NE)) || ((cmpConstVal == 1) && (cond.GetCode() == CC_EQ))) { + if (RegOperand::IsSameReg(*cmpFirstOpnd, *csetFirstOpnd)) { + bb.RemoveInsn(insn); + bb.RemoveInsn(*csetInsn); + } else { + MOperator mopCode = (cmpFirstOpnd->GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, *csetFirstOpnd, *cmpFirstOpnd); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } else if (((cmpConstVal == 1) && (cond.GetCode() == CC_NE)) || + ((cmpConstVal == 0) && (cond.GetCode() == CC_EQ))) { + MOperator mopCode = (cmpFirstOpnd->GetSize() == k64BitSize) ? MOP_xeorrri13 : MOP_weorrri12; + constexpr int64 eorImm = 1; + auto &aarch64CGFunc = static_cast(cgFunc); + ImmOperand &one = aarch64CGFunc.CreateImmOperand(eorImm, k8BitSize, false); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, *csetFirstOpnd, *cmpFirstOpnd, one); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + + cgFunc.GetRD()->UpdateInOut(bb, true); +} + +void CmpCsetPattern::Init() { + cmpConstVal = 0; + cmpFirstOpnd = nullptr; + cmpSecondOpnd = nullptr; + csetFirstOpnd = nullptr; +} + +void CmpCsetPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS(insn, bb) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } +} + +AArch64CC_t CselPattern::GetInverseCondCode(const CondOperand &cond) const { + switch (cond.GetCode()) { + case CC_NE: + return CC_EQ; + case CC_EQ: + return CC_NE; + case CC_LT: + return CC_GE; + case CC_GE: + return CC_LT; + case CC_GT: + return CC_LE; + case CC_LE: + return CC_GT; + default: + return kCcLast; + } +} + +bool CselPattern::CheckCondition(Insn &insn) { + MOperator mopCode = insn.GetMachineOpcode(); + if ((mopCode != MOP_xcselrrrc) && (mopCode != MOP_wcselrrrc)) { + return false; + } + return true; +} + +void CselPattern::Optimize(Insn &insn) { + BB &bb = *insn.GetBB(); + Operand &opnd0 = insn.GetOperand(kInsnFirstOpnd); + Operand &cond = insn.GetOperand(kInsnFourthOpnd); + MOperator newMop = ((opnd0.GetSize()) == k64BitSize ? MOP_xcsetrc : MOP_wcsetrc); + + if (OpndDefByOne(insn, kInsnSecondOpnd) && OpndDefByZero(insn, kInsnThirdOpnd)) { + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(newMop, opnd0, cond); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + cgFunc.GetRD()->InitGenUse(bb, false); + } else if (OpndDefByZero(insn, kInsnSecondOpnd) && OpndDefByOne(insn, kInsnThirdOpnd)) { + CondOperand &originCond = static_cast(cond); + AArch64CC_t inverseCondCode = GetInverseCondCode(originCond); + if (inverseCondCode == kCcLast) { + return; + } + auto &aarchCGFunc = static_cast(cgFunc); + CondOperand &inverseCond = aarchCGFunc.GetCondOperand(inverseCondCode); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(newMop, opnd0, inverseCond); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + cgFunc.GetRD()->InitGenUse(bb, false); + } +} + +void CselPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } +} + +uint32 RedundantUxtPattern::GetInsnValidBit(Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + uint32 nRet; + switch (mOp) { + case MOP_wcsetrc: + case MOP_xcsetrc: + nRet = 1; + break; + case MOP_wldrb: + case MOP_wldrsb: + case MOP_wldarb: + case MOP_wldxrb: + case MOP_wldaxrb: + nRet = k8BitSize; + break; + case MOP_wldrh: + case MOP_wldrsh: + case MOP_wldarh: + case MOP_wldxrh: + case MOP_wldaxrh: + nRet = k16BitSize; + break; + case MOP_wmovrr: + case MOP_xmovri32: + case MOP_wldli: + case MOP_wldr: + case MOP_wldp: + case MOP_wldar: + case MOP_wmovkri16: + case MOP_wmovzri16: + case MOP_wmovnri16: + case MOP_wldxr: + case MOP_wldaxr: + case MOP_wldaxp: + case MOP_wcsincrrrc: + case MOP_wcselrrrc: + case MOP_wcsinvrrrc: + nRet = k32BitSize; + break; + default: + nRet = k64BitSize; + break; + } + return nRet; +} + +uint32 RedundantUxtPattern::GetMaximumValidBit(Insn &insn, uint8 index, InsnSet &visitedInsn) const { + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(insn, index); + ASSERT(!defInsnSet.empty(), "operand must be defined before used"); + + uint32 validBit = 0; + uint32 nMaxValidBit = 0; + for (auto &defInsn : defInsnSet) { + if (visitedInsn.find(defInsn) != visitedInsn.end()) { + continue; + } + + visitedInsn.insert(defInsn); + MOperator mOp = defInsn->GetMachineOpcode(); + if ((mOp == MOP_wmovrr) || (mOp == MOP_xmovrr)) { + validBit = GetMaximumValidBit(*defInsn, 1, visitedInsn); + } else { + validBit = GetInsnValidBit(*defInsn); + } + + nMaxValidBit = nMaxValidBit < validBit ? validBit : nMaxValidBit; + } + return nMaxValidBit; +} + +bool RedundantUxtPattern::CheckCondition(Insn &insn) { + BB &bb = *insn.GetBB(); + InsnSet visitedInsn1; + InsnSet visitedInsn2; + if (!((insn.GetMachineOpcode() == MOP_xuxth32 && + GetMaximumValidBit(insn, kInsnSecondOpnd, visitedInsn1) <= k16BitSize) || + (insn.GetMachineOpcode() == MOP_xuxtb32 && + GetMaximumValidBit(insn, kInsnSecondOpnd, visitedInsn2) <= k8BitSize))) { + return false; + } + + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + secondOpnd = &(insn.GetOperand(kInsnSecondOpnd)); + if (RegOperand::IsSameReg(firstOpnd, *secondOpnd)) { + bb.RemoveInsn(insn); + /* update in/out */ + cgFunc.GetRD()->UpdateInOut(bb, true); + return false; + } + useInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(insn, 0, false); + RegOperand &firstRegOpnd = static_cast(firstOpnd); + firstRegNO = firstRegOpnd.GetRegisterNumber(); + /* for uxth R1, V501, R1 is parameter register, this can't be optimized. */ + if (firstRegOpnd.IsPhysicalRegister()) { + return false; + } + + if (useInsnSet.empty()) { + bb.RemoveInsn(insn); + /* update in/out */ + cgFunc.GetRD()->UpdateInOut(bb, true); + return false; + } + + RegOperand *secondRegOpnd = static_cast(secondOpnd); + ASSERT(secondRegOpnd != nullptr, "secondRegOpnd should not be nullptr"); + uint32 secondRegNO = secondRegOpnd->GetRegisterNumber(); + for (auto useInsn : useInsnSet) { + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(*useInsn, firstRegNO, true); + if ((defInsnSet.size() > 1) || !(cgFunc.GetRD()->RegIsLiveBetweenInsn(secondRegNO, insn, *useInsn))) { + return false; + } + } + return true; +} + +void RedundantUxtPattern::Optimize(Insn &insn) { + BB &bb = *insn.GetBB(); + ReplaceAllUsedOpndWithNewOpnd(useInsnSet, firstRegNO, *secondOpnd, true); + bb.RemoveInsn(insn); + cgFunc.GetRD()->UpdateInOut(bb, true); +} + +void RedundantUxtPattern::Init() { + useInsnSet.clear(); + secondOpnd = nullptr; +} + +void RedundantUxtPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsUnreachable()) { + continue; + } + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } +} + +bool LocalVarSaveInsnPattern::CheckFirstInsn(Insn &firstInsn) { + MOperator mOp = firstInsn.GetMachineOpcode(); + if (mOp != MOP_xmovrr && mOp != MOP_wmovrr) { + return false; + } + firstInsnSrcOpnd = &(firstInsn.GetOperand(kInsnSecondOpnd)); + RegOperand *firstInsnSrcReg = static_cast(firstInsnSrcOpnd); + if (firstInsnSrcReg->GetRegisterNumber() != R0) { + return false; + } + firstInsnDestOpnd = &(firstInsn.GetOperand(kInsnFirstOpnd)); + RegOperand *firstInsnDestReg = static_cast(firstInsnDestOpnd); + if (firstInsnDestReg->IsPhysicalRegister()) { + return false; + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckSecondInsn() { + MOperator mOp = secondInsn->GetMachineOpcode(); + if (mOp != MOP_wstr && mOp != MOP_xstr) { + return false; + } + secondInsnSrcOpnd = &(secondInsn->GetOperand(kInsnFirstOpnd)); + if (!RegOperand::IsSameReg(*firstInsnDestOpnd, *secondInsnSrcOpnd)) { + return false; + } + /* check memOperand is stack memOperand, and x0 is stored in localref var region */ + secondInsnDestOpnd = &(secondInsn->GetOperand(kInsnSecondOpnd)); + AArch64MemOperand *secondInsnDestMem = static_cast(secondInsnDestOpnd); + RegOperand *baseReg = secondInsnDestMem->GetBaseRegister(); + RegOperand *indexReg = secondInsnDestMem->GetIndexRegister(); + if ((baseReg == nullptr) || !(cgFunc.IsFrameReg(*baseReg)) || (indexReg != nullptr)) { + return false; + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckAndGetUseInsn(Insn &firstInsn) { + InsnSet useInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(firstInsn, kInsnFirstOpnd, false); + if (useInsnSet.size() != 2) { /* secondInsn and another useInsn */ + return false; + } + + /* useInsnSet includes secondInsn and another useInsn */ + for (auto tmpUseInsn : useInsnSet) { + if (tmpUseInsn->GetId() != secondInsn->GetId()) { + useInsn = tmpUseInsn; + break; + } + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckLiveRange(Insn &firstInsn) { + uint32 maxInsnNO = cgFunc.GetRD()->GetMaxInsnNO(); + uint32 useInsnID = useInsn->GetId(); + uint32 defInsnID = firstInsn.GetId(); + uint32 distance = useInsnID > defInsnID ? useInsnID - defInsnID : defInsnID - useInsnID; + float liveRangeProportion = static_cast(distance) / maxInsnNO; + /* 0.3 is a balance for real optimization effect */ + if (liveRangeProportion < 0.3) { + return false; + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckCondition(Insn &firstInsn) { + secondInsn = firstInsn.GetNext(); + if (secondInsn == nullptr) { + return false; + } + /* check firstInsn is : mov vreg, R0; */ + if (!CheckFirstInsn(firstInsn)) { + return false; + } + /* check the secondInsn is : str vreg, stackMem */ + if (!CheckSecondInsn()) { + return false; + } + /* find the uses of the vreg */ + if (!CheckAndGetUseInsn(firstInsn)) { + return false; + } + /* simulate live range using insn distance */ + if (!CheckLiveRange(firstInsn)) { + return false; + } + RegOperand *firstInsnDestReg = static_cast(firstInsnDestOpnd); + regno_t firstInsnDestRegNO = firstInsnDestReg->GetRegisterNumber(); + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(*useInsn, firstInsnDestRegNO, true); + if (defInsnSet.size() != 1) { + return false; + } + ASSERT((*(defInsnSet.begin()))->GetId() == firstInsn.GetId(), "useInsn has only one define Insn : firstInsn"); + /* check whether the stack mem is changed or not */ + AArch64MemOperand *secondInsnDestMem = static_cast(secondInsnDestOpnd); + int64 memOffset = secondInsnDestMem->GetOffsetImmediate()->GetOffsetValue(); + InsnSet memDefInsnSet = cgFunc.GetRD()->FindDefForMemOpnd(*useInsn, memOffset, true); + if (memDefInsnSet.size() != 1) { + return false; + } + if ((*(memDefInsnSet.begin()))->GetId() != secondInsn->GetId()) { + return false; + } + /* check whether has call between use and def */ + if (!cgFunc.GetRD()->HasCallBetweenDefUse(firstInsn, *useInsn)) { + return false; + } + return true; +} + +void LocalVarSaveInsnPattern::Optimize(Insn &insn) { + /* insert ldr insn before useInsn */ + MOperator ldrOpCode = secondInsnSrcOpnd->GetSize() == k64BitSize ? MOP_xldr : MOP_wldr; + Insn &ldrInsn = cgFunc.GetCG()->BuildInstruction(ldrOpCode, *secondInsnSrcOpnd, *secondInsnDestOpnd); + ldrInsn.SetId(useInsn->GetId() - 1); + useInsn->GetBB()->InsertInsnBefore(*useInsn, ldrInsn); + cgFunc.GetRD()->UpdateInOut(*useInsn->GetBB(), true); + secondInsn->SetOperand(kInsnFirstOpnd, *firstInsnSrcOpnd); + BB *saveInsnBB = insn.GetBB(); + saveInsnBB->RemoveInsn(insn); + cgFunc.GetRD()->UpdateInOut(*saveInsnBB, true); +} + +void LocalVarSaveInsnPattern::Init() { + firstInsnSrcOpnd = nullptr; + firstInsnDestOpnd = nullptr; + secondInsnSrcOpnd = nullptr; + secondInsnDestOpnd = nullptr; + useInsn = nullptr; + secondInsn = nullptr; +} + +void LocalVarSaveInsnPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsCleanup()) { + continue; + } + FOR_BB_INSNS(insn, bb) { + if (!insn->IsCall()) { + continue; + } + Insn *firstInsn = insn->GetNextMachineInsn(); + if (firstInsn == nullptr) { + continue; + } + Init(); + if (!CheckCondition(*firstInsn)) { + continue; + } + Optimize(*firstInsn); + } + } +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_ico.cpp b/src/maple_be/src/cg/aarch64/aarch64_ico.cpp new file mode 100644 index 0000000000000000000000000000000000000000..85a3c36873d470be5ed123acf2607f16db5a0925 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_ico.cpp @@ -0,0 +1,480 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_ico.h" +#include "ico.h" +#include "cg.h" +#include "cg_option.h" +#include "aarch64_isa.h" +#include "aarch64_insn.h" +#include "aarch64_cgfunc.h" + +/* + * This phase implements if-conversion optimization, + * which tries to convert conditional branches into cset/csel instructions + */ +#define ICO_DUMP CG_DEBUG_FUNC(cgFunc) +namespace maplebe { +void AArch64IfConversionOptimizer::InitOptimizePatterns() { + singlePassPatterns.push_back(memPool->New(*cgFunc)); +} + +Insn *AArch64ICOPattern::BuildCmpInsn(const Insn &condBr) { + AArch64CGFunc *func = static_cast(cgFunc); + RegOperand ® = static_cast(condBr.GetOperand(0)); + PrimType ptyp = (reg.GetSize() == k64BitSize) ? PTY_u64 : PTY_u32; + ImmOperand &numZero = func->CreateImmOperand(ptyp, 0); + Operand &rflag = func->GetOrCreateRflag(); + MOperator mopCode = (reg.GetSize() == k64BitSize) ? MOP_xcmpri : MOP_wcmpri; + Insn &cmpInsn = func->GetCG()->BuildInstruction(mopCode, rflag, reg, numZero); + return &cmpInsn; +} + +bool AArch64ICOPattern::IsSetInsn(const Insn &insn, Operand *&dest, Operand *&src) const { + MOperator mOpCode = insn.GetMachineOpcode(); + if (mOpCode >= MOP_xmovrr && mOpCode <= MOP_xvmovd) { + dest = &(insn.GetOperand(0)); + src = &(insn.GetOperand(1)); + return true; + } + dest = nullptr; + src = nullptr; + return false; +} + +AArch64CC_t AArch64ICOPattern::Encode(MOperator mOp, bool inverse) const { + switch (mOp) { + case MOP_bmi: + return inverse ? CC_PL : CC_MI; + case MOP_bvc: + return inverse ? CC_VS : CC_VC; + case MOP_bls: + return inverse ? CC_HI : CC_LS; + case MOP_blt: + return inverse ? CC_GE : CC_LT; + case MOP_ble: + return inverse ? CC_GT : CC_LE; + case MOP_beq: + return inverse ? CC_NE : CC_EQ; + case MOP_bne: + return inverse ? CC_EQ : CC_NE; + case MOP_blo: + return inverse ? CC_HS : CC_LO; + case MOP_bpl: + return inverse ? CC_MI : CC_PL; + case MOP_bhs: + return inverse ? CC_LO : CC_HS; + case MOP_bvs: + return inverse ? CC_VC : CC_VS; + case MOP_bhi: + return inverse ? CC_LS : CC_HI; + case MOP_bgt: + return inverse ? CC_LE : CC_GT; + case MOP_bge: + return inverse ? CC_LT : CC_GE; + case MOP_wcbnz: + return inverse ? CC_EQ : CC_NE; + case MOP_xcbnz: + return inverse ? CC_EQ : CC_NE; + case MOP_wcbz: + return inverse ? CC_NE : CC_EQ; + case MOP_xcbz: + return inverse ? CC_NE : CC_EQ; + default: + return kCcLast; + } +} + +Insn *AArch64ICOPattern::BuildCondSet(const Insn &branch, RegOperand ®, bool inverse) { + AArch64CC_t ccCode = Encode(branch.GetMachineOpcode(), inverse); + ASSERT(ccCode != kCcLast, "unknown cond, ccCode can't be kCcLast"); + AArch64CGFunc *func = static_cast(cgFunc); + CondOperand &cond = func->GetCondOperand(ccCode); + MOperator mopCode = (reg.GetSize() == k64BitSize) ? MOP_xcsetrc : MOP_wcsetrc; + return &func->GetCG()->BuildInstruction(mopCode, reg, cond); +} + +Insn *AArch64ICOPattern::BuildCondSel(const Insn &branch, MOperator mOp, RegOperand &dst, RegOperand &src1, + RegOperand &src2) { + AArch64CC_t ccCode = Encode(branch.GetMachineOpcode(), false); + ASSERT(ccCode != kCcLast, "unknown cond, ccCode can't be kCcLast"); + CondOperand &cond = static_cast(cgFunc)->GetCondOperand(ccCode); + return &cgFunc->GetCG()->BuildInstruction(mOp, dst, src1, src2, cond); +} + +void AArch64ICOPattern::GenerateInsnForImm(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, + RegOperand &destReg, std::vector &generateInsn) { + ImmOperand &imm1 = static_cast(ifDest); + ImmOperand &imm2 = static_cast(elseDest); + bool inverse = imm1.IsZero() && imm2.IsOne(); + if (inverse || (imm2.IsZero() && imm1.IsOne())) { + Insn *csetInsn = BuildCondSet(branchInsn, destReg, inverse); + ASSERT(csetInsn != nullptr, "build a insn failed"); + generateInsn.push_back(csetInsn); + } else if (imm1.GetValue() == imm2.GetValue()) { + MOperator mOp = (destReg.GetSize() == k64BitSize ? MOP_xmovri64 : MOP_xmovri32); + Insn &tempInsn = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, destReg, + imm1); + generateInsn.push_back(&tempInsn); + } else { + MOperator mOp = (destReg.GetSize() == k64BitSize ? MOP_xmovri64 : MOP_xmovri32); + RegOperand *tempTarIf = cgFunc->GetTheCFG()->CreateVregFromReg(destReg); + Insn &tempInsnIf = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, *tempTarIf, + imm1); + generateInsn.push_back(&tempInsnIf); + + RegOperand *tempTarElse = cgFunc->GetTheCFG()->CreateVregFromReg(destReg); + Insn &tempInsnElse = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, *tempTarElse, + imm2); + generateInsn.push_back(&tempInsnElse); + + uint32 dSize = destReg.GetSize(); + bool isIntTy = destReg.IsOfIntClass(); + MOperator mOpCode = isIntTy ? (dSize == k64BitSize ? MOP_xcselrrrc : MOP_wcselrrrc) + : (dSize == k64BitSize ? MOP_dcselrrrc : (dSize == k32BitSize ? + MOP_scselrrrc : MOP_hcselrrrc)); + Insn *cselInsn = BuildCondSel(branchInsn, mOpCode, destReg, *tempTarIf, *tempTarElse); + CHECK_FATAL(cselInsn != nullptr, "build a csel insn failed"); + generateInsn.push_back(cselInsn); + } +} + +RegOperand *AArch64ICOPattern::GenerateRegAndTempInsn(Operand &dest, const RegOperand &destReg, + std::vector &generateInsn) { + RegOperand *reg = nullptr; + if (!dest.IsRegister()) { + MOperator mOp = (destReg.GetSize() == k64BitSize ? MOP_xmovri64 : MOP_xmovri32); + reg = cgFunc->GetTheCFG()->CreateVregFromReg(destReg); + ImmOperand &tempSrcElse = static_cast(dest); + Insn &tempInsn = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, *reg, + tempSrcElse); + generateInsn.push_back(&tempInsn); + return reg; + } else { + return (static_cast(&dest)); + } +} + +void AArch64ICOPattern::GenerateInsnForReg(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, + RegOperand &destReg, std::vector &generateInsn) { + RegOperand *tReg = GenerateRegAndTempInsn(ifDest, destReg, generateInsn); + RegOperand *eReg = GenerateRegAndTempInsn(elseDest, destReg, generateInsn); + + /* mov w0, w1 mov w0, w1 --> mov w0, w1 */ + if (eReg->GetRegisterNumber() == tReg->GetRegisterNumber()) { + uint32 dSize = destReg.GetSize(); + bool srcIsIntTy = tReg->IsOfIntClass(); + bool destIsIntTy = destReg.IsOfIntClass(); + MOperator mOp; + if (dSize == k64BitSize) { + mOp = srcIsIntTy ? (destIsIntTy ? MOP_xmovrr : MOP_xvmovdr) : (destIsIntTy ? MOP_xvmovrd : MOP_xvmovd); + } else { + mOp = srcIsIntTy ? (destIsIntTy ? MOP_wmovrr : MOP_xvmovsr) : (destIsIntTy ? MOP_xvmovrs : MOP_xvmovs); + } + Insn &tempInsnIf = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, destReg, + *tReg); + generateInsn.push_back(&tempInsnIf); + } else { + uint32 dSize = destReg.GetSize(); + bool isIntTy = destReg.IsOfIntClass(); + MOperator mOpCode = isIntTy ? (dSize == k64BitSize ? MOP_xcselrrrc : MOP_wcselrrrc) + : (dSize == k64BitSize ? MOP_dcselrrrc : (dSize == k32BitSize ? + MOP_scselrrrc : MOP_hcselrrrc)); + Insn *cselInsn = BuildCondSel(branchInsn, mOpCode, destReg, *tReg, *eReg); + CHECK_FATAL(cselInsn != nullptr, "build a csel insn failed"); + generateInsn.push_back(cselInsn); + } +} + +Operand *AArch64ICOPattern::GetDestReg(const std::map &destSrcMap, + const RegOperand &destReg) const { + Operand *dest = nullptr; + for (const auto &destSrcPair : destSrcMap) { + ASSERT(destSrcPair.first->IsRegister(), "opnd must be register"); + RegOperand *destRegInMap = static_cast(destSrcPair.first); + ASSERT(destRegInMap != nullptr, "nullptr check"); + if (destRegInMap->GetRegisterNumber() == destReg.GetRegisterNumber()) { + dest = destSrcPair.second; + break; + } + } + return dest; +} + +bool AArch64ICOPattern::BuildCondMovInsn(BB &cmpBB, const BB &bb, const std::map &ifDestSrcMap, + const std::map &elseDestSrcMap, + bool elseBBIsProcessed, std::vector &generateInsn) { + Insn *branchInsn = cgFunc->GetTheCFG()->FindLastCondBrInsn(cmpBB); + FOR_BB_INSNS_CONST(insn, (&bb)) { + if (!insn->IsMachineInstruction() || insn->IsBranch()) { + continue; + } + Operand *dest = nullptr; + Operand *src = nullptr; + + if (!IsSetInsn(*insn, dest, src)) { + ASSERT(false, "insn check"); + } + ASSERT(dest->IsRegister(), "register check"); + RegOperand *destReg = static_cast(dest); + + Operand *elseDest = GetDestReg(elseDestSrcMap, *destReg); + Operand *ifDest = GetDestReg(ifDestSrcMap, *destReg); + + if (elseBBIsProcessed) { + if (elseDest != nullptr) { + continue; + } + elseDest = dest; + ASSERT(ifDest != nullptr, "null ptr check"); + if (!bb.GetLiveOut()->TestBit(destReg->GetRegisterNumber())) { + continue; + } + } else { + ASSERT(elseDest != nullptr, "null ptr check"); + if (ifDest == nullptr) { + if (!bb.GetLiveOut()->TestBit(destReg->GetRegisterNumber())) { + continue; + } + ifDest = dest; + } + } + + /* generate cset or csel instruction */ + ASSERT(ifDest != nullptr, "null ptr check"); + if (ifDest->IsIntImmediate() && elseDest->IsIntImmediate()) { + GenerateInsnForImm(*branchInsn, *ifDest, *elseDest, *destReg, generateInsn); + } else { + GenerateInsnForReg(*branchInsn, *ifDest, *elseDest, *destReg, generateInsn); + } + } + + return true; +} + +bool AArch64ICOPattern::CheckModifiedRegister(Insn &insn, std::map &destSrcMap, Operand &src, + Operand &dest) const { +/* src was modified in this blcok earlier */ + if (src.IsRegister()) { + RegOperand &srcReg = static_cast(src); + for (const auto &destSrcPair : destSrcMap) { + ASSERT(destSrcPair.first->IsRegister(), "opnd must be register"); + RegOperand *mapSrcReg = static_cast(destSrcPair.first); + if (mapSrcReg->GetRegisterNumber() == srcReg.GetRegisterNumber()) { + return false; + } + } + } + + /* dest register was modified earlier in this block */ + ASSERT(dest.IsRegister(), "opnd must be register"); + RegOperand &destReg = static_cast(dest); + for (const auto &destSrcPair : destSrcMap) { + ASSERT(destSrcPair.first->IsRegister(), "opnd must be register"); + RegOperand *mapSrcReg = static_cast(destSrcPair.first); + if (mapSrcReg->GetRegisterNumber() == destReg.GetRegisterNumber()) { + return false; + } + } + + /* src register is modified later in this block, will not be processed */ + if (src.IsRegister()) { + RegOperand &srcReg = static_cast(src); + if (destReg.IsOfFloatOrSIMDClass() && srcReg.IsZeroRegister()) { + return false; + } + for (Insn *tmpInsn = &insn; tmpInsn != nullptr; tmpInsn = tmpInsn->GetNext()) { + Operand *tmpDest = nullptr; + Operand *tmpSrc = nullptr; + if (IsSetInsn(*tmpInsn, tmpDest, tmpSrc) && tmpDest->Equals(src)) { + ASSERT(tmpDest->IsRegister(), "opnd must be register"); + RegOperand *tmpDestReg = static_cast(tmpDest); + if (srcReg.GetRegisterNumber() == tmpDestReg->GetRegisterNumber()) { + return false; + } + } + } + } + return true; +} + +bool AArch64ICOPattern::CheckCondMoveBB(BB *bb, std::map &destSrcMap, + std::vector &destRegs, Operand *flagOpnd) const { + if (bb == nullptr) { + return false; + } + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction() || insn->IsBranch()) { + continue; + } + Operand *dest = nullptr; + Operand *src = nullptr; + + if (!IsSetInsn(*insn, dest, src)) { + return false; + } + ASSERT(dest != nullptr, "null ptr check"); + ASSERT(src != nullptr, "null ptr check"); + + if (!dest->IsRegister()) { + return false; + } + + if (!src->IsConstant() && !src->IsRegister()) { + return false; + } + + if (flagOpnd != nullptr) { + RegOperand *flagReg = static_cast(flagOpnd); + regno_t flagRegNO = flagReg->GetRegisterNumber(); + if (bb->GetLiveOut()->TestBit(flagRegNO)) { + return false; + } + } + + if (!CheckModifiedRegister(*insn, destSrcMap, *src, *dest)) { + return false; + } + + (void)destSrcMap.insert(std::make_pair(dest, src)); + destRegs.push_back(dest); + } + return true; +} + +/* Convert conditional branches into cset/csel instructions */ +bool AArch64ICOPattern::DoOpt(BB &cmpBB, BB *ifBB, BB *elseBB, BB &joinBB) { + Insn *condBr = cgFunc->GetTheCFG()->FindLastCondBrInsn(cmpBB); + ASSERT(condBr != nullptr, "nullptr check"); + Insn *cmpInsn = FindLastCmpInsn(cmpBB); + Operand *flagOpnd = nullptr; + /* for cbnz and cbz institution */ + if (cgFunc->GetTheCFG()->IsCompareAndBranchInsn(*condBr)) { + if (condBr->GetOperand(0).IsZeroRegister()) { + return false; + } + cmpInsn = condBr; + flagOpnd = &(condBr->GetOperand(0)); + } + + /* tbz will not be optimized */ + MOperator mOperator = condBr->GetMachineOpcode(); + if (mOperator == MOP_xtbz || mOperator == MOP_wtbz || mOperator == MOP_xtbnz || mOperator == MOP_wtbnz) { + return false; + } + if (cmpInsn == nullptr) { + return false; + } + + std::vector ifDestRegs; + std::vector elseDestRegs; + + std::map ifDestSrcMap; + std::map elseDestSrcMap; + + if (!CheckCondMoveBB(elseBB, elseDestSrcMap, elseDestRegs, flagOpnd) || + (ifBB != nullptr && !CheckCondMoveBB(ifBB, ifDestSrcMap, ifDestRegs, flagOpnd))) { + return false; + } + + size_t count = elseDestRegs.size(); + + for (auto *itr : ifDestRegs) { + bool foundInElse = false; + for (auto *elseItr : elseDestRegs) { + RegOperand *elseDestReg = static_cast(elseItr); + RegOperand *ifDestReg = static_cast(itr); + if (ifDestReg->GetRegisterNumber() == elseDestReg->GetRegisterNumber()) { + foundInElse = true; + break; + } + } + if (foundInElse) { + continue; + } else { + ++count; + } + } + if (count > kThreshold) { + return false; + } + + /* generate insns */ + std::vector elseGenerateInsn; + std::vector ifGenerateInsn; + bool elseBBProcessResult = false; + if (elseBB != nullptr) { + elseBBProcessResult = BuildCondMovInsn(cmpBB, *elseBB, ifDestSrcMap, elseDestSrcMap, false, elseGenerateInsn); + } + bool ifBBProcessResult = false; + if (ifBB != nullptr) { + ifBBProcessResult = BuildCondMovInsn(cmpBB, *ifBB, ifDestSrcMap, elseDestSrcMap, true, ifGenerateInsn); + } + if (!elseBBProcessResult || (ifBB != nullptr && !ifBBProcessResult)) { + return false; + } + + /* insert insn */ + if (cgFunc->GetTheCFG()->IsCompareAndBranchInsn(*condBr)) { + Insn *innerCmpInsn = BuildCmpInsn(*condBr); + cmpBB.InsertInsnBefore(*condBr, *innerCmpInsn); + cmpInsn = innerCmpInsn; + } + + if (elseBB != nullptr) { + cmpBB.SetKind(elseBB->GetKind()); + } else { + cmpBB.SetKind(ifBB->GetKind()); + } + /* delete condBr */ + cmpBB.RemoveInsn(*condBr); + /* Insert goto insn after csel insn. */ + if (cmpBB.GetKind() == BB::kBBGoto || cmpBB.GetKind() == BB::kBBIf) { + if (elseBB != nullptr) { + cmpBB.InsertInsnAfter(*cmpBB.GetLastInsn(), *elseBB->GetLastInsn()); + } else { + cmpBB.InsertInsnAfter(*cmpBB.GetLastInsn(), *ifBB->GetLastInsn()); + } + } + + /* Insert instructions in branches after cmpInsn */ + for (auto itr = elseGenerateInsn.rbegin(); itr != elseGenerateInsn.rend(); ++itr) { + cmpBB.InsertInsnAfter(*cmpInsn, **itr); + } + for (auto itr = ifGenerateInsn.rbegin(); itr != ifGenerateInsn.rend(); ++itr) { + cmpBB.InsertInsnAfter(*cmpInsn, **itr); + } + + /* Remove branches and merge join */ + if (ifBB != nullptr) { + cgFunc->GetTheCFG()->RemoveBB(*ifBB); + } + if (elseBB != nullptr) { + cgFunc->GetTheCFG()->RemoveBB(*elseBB); + } + + if (cmpBB.GetKind() != BB::kBBIf && cmpBB.GetNext() == &joinBB && + !maplebe::CGCFG::InLSDA(joinBB.GetLabIdx(), *cgFunc->GetEHFunc()) && + cgFunc->GetTheCFG()->CanMerge(cmpBB, joinBB)) { + maplebe::CGCFG::MergeBB(cmpBB, joinBB, *cgFunc); + keepPosition = true; + } + return true; +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_optimize_common.cpp b/src/maple_be/src/cg/aarch64/aarch64_optimize_common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8b480135b9040ace11bc84360bdcdad615971316 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_optimize_common.cpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_optimize_common.h" +#include "aarch64_isa.h" +#include "aarch64_cgfunc.h" +#include "cgbb.h" + +namespace maplebe { +namespace { +constexpr int kTbxTargetIdx = 2; +}; + +MOperator AArch64InsnVisitor::FlipConditionOp(MOperator originalOp, int &targetIdx) { + targetIdx = 1; + switch (originalOp) { + case AArch64MOP_t::MOP_beq: + return AArch64MOP_t::MOP_bne; + case AArch64MOP_t::MOP_bge: + return AArch64MOP_t::MOP_blt; + case AArch64MOP_t::MOP_bgt: + return AArch64MOP_t::MOP_ble; + case AArch64MOP_t::MOP_bhi: + return AArch64MOP_t::MOP_bls; + case AArch64MOP_t::MOP_bhs: + return AArch64MOP_t::MOP_blo; + case AArch64MOP_t::MOP_ble: + return AArch64MOP_t::MOP_bgt; + case AArch64MOP_t::MOP_blo: + return AArch64MOP_t::MOP_bhs; + case AArch64MOP_t::MOP_bls: + return AArch64MOP_t::MOP_bhi; + case AArch64MOP_t::MOP_blt: + return AArch64MOP_t::MOP_bge; + case AArch64MOP_t::MOP_bne: + return AArch64MOP_t::MOP_beq; + case AArch64MOP_t::MOP_xcbnz: + return AArch64MOP_t::MOP_xcbz; + case AArch64MOP_t::MOP_wcbnz: + return AArch64MOP_t::MOP_wcbz; + case AArch64MOP_t::MOP_xcbz: + return AArch64MOP_t::MOP_xcbnz; + case AArch64MOP_t::MOP_wcbz: + return AArch64MOP_t::MOP_wcbnz; + case AArch64MOP_t::MOP_wtbnz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_wtbz; + case AArch64MOP_t::MOP_wtbz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_wtbnz; + case AArch64MOP_t::MOP_xtbnz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_xtbz; + case AArch64MOP_t::MOP_xtbz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_xtbnz; + default: + break; + } + return AArch64MOP_t::MOP_undef; +} + +void AArch64InsnVisitor::ModifyJumpTarget(Operand &targetOperand, BB &bb) { + bb.GetLastInsn()->SetOperand(GetJumpTargetIdx(*(bb.GetLastInsn())), targetOperand); +} + +void AArch64InsnVisitor::ModifyJumpTarget(maple::LabelIdx targetLabel, BB &bb) { + ModifyJumpTarget(static_cast(GetCGFunc())->GetOrCreateLabelOperand(targetLabel), bb); +} + +void AArch64InsnVisitor::ModifyJumpTarget(BB &newTarget, BB &bb) { + ModifyJumpTarget(newTarget.GetLastInsn()->GetOperand(GetJumpTargetIdx(*(newTarget.GetLastInsn()))), bb); +} + +Insn *AArch64InsnVisitor::CloneInsn(Insn &originalInsn) { + MemPool *memPool = const_cast(CG::GetCurCGFunc()->GetMemoryPool()); + if (originalInsn.IsTargetInsn()) { + return memPool->Clone(*static_cast(&originalInsn)); + } else if (originalInsn.IsCfiInsn()) { + return memPool->Clone(*static_cast(&originalInsn)); + } + CHECK_FATAL(false, "Cannot clone"); + return nullptr; +} + +/* + * Precondition: The given insn is a jump instruction. + * Get the jump target label operand index from the given instruction. + * Note: MOP_xbr is a jump instruction, but the target is unknown at compile time, + * because a register instead of label. So we don't take it as a branching instruction. + */ +int AArch64InsnVisitor::GetJumpTargetIdx(const Insn &insn) const { + MOperator mOp = insn.GetMachineOpcode(); + switch (mOp) { + /* unconditional jump */ + case MOP_xuncond: { + return 0; + } + /* conditional jump */ + case MOP_bmi: + case MOP_bvc: + case MOP_bls: + case MOP_blt: + case MOP_ble: + case MOP_blo: + case MOP_beq: + case MOP_bpl: + case MOP_bhs: + case MOP_bvs: + case MOP_bhi: + case MOP_bgt: + case MOP_bge: + case MOP_bne: + case MOP_wcbz: + case MOP_xcbz: + case MOP_wcbnz: + case MOP_xcbnz: { + return 1; + } + case MOP_wtbz: + case MOP_xtbz: + case MOP_wtbnz: + case MOP_xtbnz: { + return kTbxTargetIdx; + } + default: + CHECK_FATAL(false, "Not a jump insn"); + } + return 0; +} + +/* + * Precondition: The given insn is a jump instruction. + * Get the jump target label from the given instruction. + * Note: MOP_xbr is a branching instruction, but the target is unknown at compile time, + * because a register instead of label. So we don't take it as a branching instruction. + */ +LabelIdx AArch64InsnVisitor::GetJumpLabel(const Insn &insn) const { + int operandIdx = GetJumpTargetIdx(insn); + if (insn.GetOperand(operandIdx).IsLabelOpnd()) { + return static_cast(insn.GetOperand(operandIdx)).GetLabelIndex(); + } + ASSERT(false, "Operand is not label"); + return 0; +} + +bool AArch64InsnVisitor::IsCompareInsn(const Insn &insn) const { + switch (insn.GetMachineOpcode()) { + case MOP_wcmpri: + case MOP_wcmprr: + case MOP_xcmpri: + case MOP_xcmprr: + case MOP_hcmperi: + case MOP_hcmperr: + case MOP_scmperi: + case MOP_scmperr: + case MOP_dcmperi: + case MOP_dcmperr: + case MOP_hcmpqri: + case MOP_hcmpqrr: + case MOP_scmpqri: + case MOP_scmpqrr: + case MOP_dcmpqri: + case MOP_dcmpqrr: + case MOP_wcmnri: + case MOP_wcmnrr: + case MOP_xcmnri: + case MOP_xcmnrr: + return true; + default: + return false; + } +} + +bool AArch64InsnVisitor::IsCompareAndBranchInsn(const Insn &insn) const { + switch (insn.GetMachineOpcode()) { + case MOP_wcbnz: + case MOP_xcbnz: + case MOP_wcbz: + case MOP_xcbz: + return true; + default: + return false; + } +} + +RegOperand *AArch64InsnVisitor::CreateVregFromReg(const RegOperand &pReg) { + return &static_cast(GetCGFunc())->CreateRegisterOperandOfType( + pReg.GetRegisterType(), pReg.GetSize() / k8BitSize); +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_peep.cpp b/src/maple_be/src/cg/aarch64/aarch64_peep.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d61873753a194079b7014e67867591351f1dd0da --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_peep.cpp @@ -0,0 +1,2691 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_peep.h" +#include "cg.h" +#include "mpl_logging.h" +#include "common_utils.h" + +namespace maplebe { +#define JAVALANG (cgFunc.GetMirModule().IsJavaModule()) +namespace { +const std::string kMccLoadRef = "MCC_LoadRefField"; +const std::string kMccLoadRefV = "MCC_LoadVolatileField"; +const std::string kMccLoadRefS = "MCC_LoadRefStatic"; +const std::string kMccLoadRefVS = "MCC_LoadVolatileStaticField"; +const std::string kMccDummy = "MCC_Dummy"; + +const std::string GetReadBarrierName(const Insn &insn) { + constexpr int32 totalBarrierNamesNum = 5; + std::array barrierNames = { + kMccLoadRef, kMccLoadRefV, kMccLoadRefS, kMccLoadRefVS, kMccDummy + }; + if (insn.GetMachineOpcode() == MOP_xbl || + insn.GetMachineOpcode() == MOP_tail_call_opt_xbl) { + auto &op = static_cast(insn.GetOperand(kInsnFirstOpnd)); + const std::string &funcName = op.GetName(); + for (const std::string &singleBarrierName : barrierNames) { + if (funcName == singleBarrierName) { + return singleBarrierName; + } + } + } + return ""; +} + +MOperator GetLoadOperator(uint32 refSize, bool isVolatile) { + if (refSize == k32BitSize) { + return isVolatile ? MOP_wldar : MOP_wldr; + } + return isVolatile ? MOP_xldar : MOP_xldr; +} +} + +void AArch64PeepHole::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveIdenticalLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveMovingtoSameRegOpt] = optOwnMemPool->New(cgFunc);; + optimizations[kCombineContiLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEliminateSpecifcSXTOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEliminateSpecifcUXTOpt] = optOwnMemPool->New(cgFunc); + optimizations[kFmovRegOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCbnzToCbzOpt] = optOwnMemPool->New(cgFunc); + optimizations[kContiLDRorSTRToSameMEMOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveIncDecRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kInlineReadBarriersOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceDivToMultiOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToCsetOpt] = optOwnMemPool->New(cgFunc); + optimizations[kZeroCmpBranchesOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PeepHole::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_wmovrr: + case MOP_xmovrr: + case MOP_xvmovs: + case MOP_xvmovd: { + (static_cast(optimizations[kRemoveMovingtoSameRegOpt]))->Run(bb, insn); + break; + } + case MOP_xldr: + case MOP_xstr: + case MOP_wldr: + case MOP_wstr: + case MOP_dldr: + case MOP_dstr: + case MOP_sldr: + case MOP_sstr: { + (static_cast(optimizations[kCombineContiLoadAndStoreOpt]))->Run(bb, insn); + (static_cast(optimizations[kContiLDRorSTRToSameMEMOpt]))->Run(bb, insn); + (static_cast(optimizations[kRemoveIdenticalLoadAndStoreOpt]))->Run(bb, insn); + break; + } + case MOP_xsxtb32: + case MOP_xsxth32: + case MOP_xsxtb64: + case MOP_xsxth64: + case MOP_xsxtw64: { + (static_cast(optimizations[kEliminateSpecifcSXTOpt]))->Run(bb, insn); + break; + } + case MOP_xuxtb32: + case MOP_xuxth32: + case MOP_xuxtw64: { + (static_cast(optimizations[kEliminateSpecifcUXTOpt]))->Run(bb, insn); + break; + } + case MOP_xvmovrv: + case MOP_xvmovrd: { + (static_cast(optimizations[kFmovRegOpt]))->Run(bb, insn); + break; + } + case MOP_wcbnz: + case MOP_xcbnz: { + (static_cast(optimizations[kCbnzToCbzOpt]))->Run(bb, insn); + break; + } + case MOP_xbl: { + (static_cast(optimizations[kRemoveIncDecRefOpt]))->Run(bb, insn); + break; + } + case MOP_wsdivrrr: { + (static_cast(optimizations[kReplaceDivToMultiOpt]))->Run(bb, insn); + break; + } + case MOP_wcsetrc: + case MOP_xcsetrc: { + (static_cast(optimizations[kAndCmpBranchesToCsetOpt]))->Run(bb, insn); + break; + } + default: + break; + } + if (GetReadBarrierName(insn) != "") { /* skip if it is not a read barrier call. */ + (static_cast(optimizations[kInlineReadBarriersOpt]))->Run(bb, insn); + } + if (&insn == bb.GetLastInsn()) { + (static_cast(optimizations[kZeroCmpBranchesOpt]))->Run(bb, insn); + } +} + +void AArch64PeepHole0::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveIdenticalLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCmpCsetOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptAdd] = optOwnMemPool->New(cgFunc); + optimizations[kDeleteMovAfterCbzOrCbnzOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PeepHole0::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xstr: + case MOP_wstr: { + (static_cast(optimizations[kRemoveIdenticalLoadAndStoreOpt]))->Run(bb, insn); + break; + } + case MOP_wcmpri: + case MOP_xcmpri: { + (static_cast(optimizations[kCmpCsetOpt]))->Run(bb, insn); + break; + } + case MOP_xaddrrr: { + (static_cast(optimizations[kComplexMemOperandOptAdd]))->Run(bb, insn); + break; + } + case MOP_wcbz: + case MOP_xcbz: + case MOP_wcbnz: + case MOP_xcbnz: { + (static_cast(optimizations[kDeleteMovAfterCbzOrCbnzOpt]))->Run(bb, insn); + break; + } + default: + break; + } +} + +void AArch64PrePeepHole::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kOneHoleBranchesPreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kLoadFloatPointOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceOrrToMovOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceCmpToCmnOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveIncRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kLongIntCompareWithZOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandPreOptAdd] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptLSL] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptLabel] = optOwnMemPool->New(cgFunc); + optimizations[kWriteFieldCallOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PrePeepHole::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xmovzri16: { + (static_cast(optimizations[kLoadFloatPointOpt]))->Run(bb, insn); + break; + } + case MOP_wiorri12r: + case MOP_wiorrri12: + case MOP_xiorri13r: + case MOP_xiorrri13: { + (static_cast(optimizations[kReplaceOrrToMovOpt]))->Run(bb, insn); + break; + } + case MOP_xmovri32: + case MOP_xmovri64: { + (static_cast(optimizations[kReplaceCmpToCmnOpt]))->Run(bb, insn); + break; + } + case MOP_xbl: { + (static_cast(optimizations[kRemoveIncRefOpt]))->Run(bb, insn); + if (CGOptions::IsGCOnly() && CGOptions::DoWriteRefFieldOpt()) { + (static_cast(optimizations[kWriteFieldCallOpt]))->Run(bb, insn); + } + break; + } + case MOP_xcmpri: { + (static_cast(optimizations[kLongIntCompareWithZOpt]))->Run(bb, insn); + break; + } + case MOP_xaddrrr: { + (static_cast(optimizations[kComplexMemOperandPreOptAdd]))->Run(bb, insn); + break; + } + case MOP_xaddrrrs: { + (static_cast(optimizations[kComplexMemOperandOptLSL]))->Run(bb, insn); + break; + } + case MOP_xldli: { + (static_cast(optimizations[kComplexMemOperandOptLabel]))->Run(bb, insn); + break; + } + default: + break; + } + if (&insn == bb.GetLastInsn()) { + (static_cast(optimizations[kOneHoleBranchesPreOpt]))->Run(bb, insn); + if (CGOptions::IsGCOnly() && CGOptions::DoWriteRefFieldOpt()) { + (static_cast(optimizations[kWriteFieldCallOpt]))->Reset(); + } + } +} + +void AArch64PrePeepHole1::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveDecRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComputationTreeOpt] = optOwnMemPool->New(cgFunc); + optimizations[kOneHoleBranchesOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceIncDecWithIncOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToTbzOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PrePeepHole1::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xbl: { + if (JAVALANG) { + (static_cast(optimizations[kRemoveDecRefOpt]))->Run(bb, insn); + (static_cast(optimizations[kReplaceIncDecWithIncOpt]))->Run(bb, insn); + } + break; + } + case MOP_xaddrri12: { + (static_cast(optimizations[kComputationTreeOpt]))->Run(bb, insn); + break; + } + default: + break; + } + if (&insn == bb.GetLastInsn()) { + switch (thisMop) { + case MOP_wcbz: + case MOP_wcbnz: + case MOP_xcbz: + case MOP_xcbnz: { + (static_cast(optimizations[kOneHoleBranchesOpt]))->Run(bb, insn); + break; + } + case MOP_beq: + case MOP_bne: { + (static_cast(optimizations[kAndCmpBranchesToTbzOpt]))->Run(bb, insn); + break; + } + default: + break; + } + } +} + +void RemoveIdenticalLoadAndStoreAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr) { + return; + } + MOperator mop1 = insn.GetMachineOpcode(); + MOperator mop2 = nextInsn->GetMachineOpcode(); + if ((mop1 == MOP_wstr && mop2 == MOP_wstr) || (mop1 == MOP_xstr && mop2 == MOP_xstr)) { + if (IsMemOperandsIdentical(insn, *nextInsn)) { + bb.RemoveInsn(insn); + insn = *nextInsn; + } + } else if ((mop1 == MOP_wstr && mop2 == MOP_wldr) || (mop1 == MOP_xstr && mop2 == MOP_xldr)) { + if (IsMemOperandsIdentical(insn, *nextInsn)) { + bb.RemoveInsn(*nextInsn); + } + } +} + +bool RemoveIdenticalLoadAndStoreAArch64::IsMemOperandsIdentical(const Insn &insn1, const Insn &insn2) const { + regno_t regNO1 = static_cast(insn1.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + regno_t regNO2 = static_cast(insn2.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + if (regNO1 != regNO2) { + return false; + } + /* Match only [base + offset] */ + auto &memOpnd1 = static_cast(insn1.GetOperand(kInsnSecondOpnd)); + if (static_cast(memOpnd1).GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + auto &memOpnd2 = static_cast(insn2.GetOperand(kInsnSecondOpnd)); + if (static_cast(memOpnd2).GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + Operand *base1 = memOpnd1.GetBaseRegister(); + Operand *base2 = memOpnd2.GetBaseRegister(); + if (!((base1 != nullptr) && base1->IsRegister()) || !((base2 != nullptr) && base2->IsRegister())) { + return false; + } + + regno_t baseRegNO1 = static_cast(base1)->GetRegisterNumber(); + /* First insn re-write base addr reg1 <- [ reg1 + offset ] */ + if (baseRegNO1 == regNO1) { + return false; + } + + regno_t baseRegNO2 = static_cast(base2)->GetRegisterNumber(); + if (baseRegNO1 != baseRegNO2) { + return false; + } + + if (static_cast(memOpnd1).GetOffsetImmediate()->GetOffsetValue() != + static_cast(memOpnd2).GetOffsetImmediate()->GetOffsetValue()) { + return false; + } + return true; +} + +void RemoveMovingtoSameRegAArch64::Run(BB &bb, Insn &insn) { + ASSERT(insn.GetOperand(kInsnFirstOpnd).IsRegister(), "expects registers"); + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsRegister(), "expects registers"); + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + + if ((reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) && (reg1.GetSize() == reg2.GetSize())) { + bb.RemoveInsn(insn); + } +} + +/* Combining 2 STRs into 1 stp or 2 LDRs into 1 ldp */ +void CombineContiLoadAndStoreAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr || nextInsn->GetMachineOpcode() != thisMop) { + return; + } + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + ASSERT(nextInsn->GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + auto &memOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + + AArch64MemOperand::AArch64AddressingMode addrMode1 = memOpnd1.GetAddrMode(); + if (addrMode1 != AArch64MemOperand::kAddrModeBOi || (!memOpnd1.IsIntactIndexed())) { + return; + } + + auto *base1 = static_cast(memOpnd1.GetBaseRegister()); + ASSERT(base1 == nullptr || !base1->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset1 = memOpnd1.GetOffsetImmediate(); + + auto &memOpnd2 = static_cast(nextInsn->GetOperand(kInsnSecondOpnd)); + + AArch64MemOperand::AArch64AddressingMode addrMode2 = memOpnd2.GetAddrMode(); + if (addrMode2 != AArch64MemOperand::kAddrModeBOi || (!memOpnd2.IsIntactIndexed())) { + return; + } + + auto *base2 = static_cast(memOpnd2.GetBaseRegister()); + ASSERT(base2 == nullptr || !base2->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset2 = memOpnd2.GetOffsetImmediate(); + + if (base1 == nullptr || base2 == nullptr || offset1 == nullptr || offset2 == nullptr) { + return; + } + + /* + * In ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile + * LDP on page K1-6125 delcare that ldp can't use same reg + */ + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(nextInsn->GetOperand(kInsnFirstOpnd)); + if ((thisMop == MOP_xldr || thisMop == MOP_sldr || thisMop == MOP_dldr || thisMop == MOP_wldr) && + reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) { + return; + } + + if (reg1.GetSize() != memOpnd1.GetSize() || reg2.GetSize() != memOpnd2.GetSize()) { + return; + } + + uint32 size = reg1.GetSize() >> kLog2BitsPerByte; + int offsetVal1 = offset1->GetOffsetValue(); + int offsetVal2 = offset2->GetOffsetValue(); + if ((base1->GetRegisterNumber() == RFP || base1->GetRegisterNumber() == RSP) && + base1->GetRegisterNumber() == base2->GetRegisterNumber() && + reg1.GetRegisterType() == reg2.GetRegisterType() && reg1.GetSize() == reg2.GetSize() && + abs(offsetVal1 - offsetVal2) == static_cast(size)) { + /* pair instr for 8/4 byte registers must have multiple of 8/4 for imm */ + if ((static_cast(offsetVal1) % size) != 0) { + return; + } + /* For stp/ldp, the imm should be within -512 and 504. */ + if (size == kIntregBytelen) { + if (offsetVal1 <= kStpLdpImm64LowerBound || offsetVal1 >= kStpLdpImm64UpperBound) { + return; + } + } + if (size == (kIntregBytelen >> 1)) { + if (offsetVal1 <= kStpLdpImm32LowerBound || offsetVal1 >= kStpLdpImm32UpperBound) { + return; + } + } + + MOperator mopPair = GetMopPair(thisMop); + CG *cg = cgFunc.GetCG(); + if (offsetVal1 < offsetVal2) { + bb.InsertInsnAfter(*nextInsn, cg->BuildInstruction(mopPair, reg1, reg2, memOpnd1)); + } else { + bb.InsertInsnAfter(*nextInsn, cg->BuildInstruction(mopPair, reg2, reg1, memOpnd2)); + } + + /* keep the comment */ + Insn *nn = nextInsn->GetNext(); + std::string newComment = ""; + MapleString comment = insn.GetComment(); + if (comment.c_str() != nullptr && strlen(comment.c_str()) > 0) { + newComment += comment.c_str(); + } + comment = nextInsn->GetComment(); + if (newComment.c_str() != nullptr && strlen(newComment.c_str()) > 0) { + newComment += " "; + } + if (comment.c_str() != nullptr && strlen(comment.c_str()) > 0) { + newComment += comment.c_str(); + } + if (newComment.c_str() != nullptr && strlen(newComment.c_str()) > 0) { + nn->SetComment(newComment); + } + bb.RemoveInsn(insn); + bb.RemoveInsn(*nextInsn); + insn = *nn; + } /* pattern found */ +} + +void EliminateSpecifcSXTAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPrev(); + while (prevInsn != nullptr && !prevInsn->GetMachineOpcode()) { + prevInsn = prevInsn->GetPrev(); + } + if (prevInsn == nullptr) { + return; + } + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®Opnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (&insn != bb.GetFirstInsn() && regOpnd0.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + prevInsn->IsMachineInstruction()) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + if (thisMop == MOP_xsxtb32) { + /* value should in range between -127 and 127 */ + if (value >= static_cast(0xFFFFFFFFFFFFFF80) && value <= 0x7F && + immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + bb.RemoveInsn(insn); + } + } else if (thisMop == MOP_xsxth32) { + /* value should in range between -32678 and 32678 */ + if (value >= static_cast(0xFFFFFFFFFFFF8000) && value <= 0x7FFF && + immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + bb.RemoveInsn(insn); + } + } else { + uint64 flag = 0xFFFFFFFFFFFFFF80; /* initialize the flag with fifty-nine 1s at top */ + if (thisMop == MOP_xsxth64) { + flag = 0xFFFFFFFFFFFF8000; /* specify the flag with forty-nine 1s at top in this case */ + } else if (thisMop == MOP_xsxtw64) { + flag = 0xFFFFFFFF80000000; /* specify the flag with thirty-three 1s at top in this case */ + } + if (!(static_cast(value) & flag) && immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + auto *aarch64CGFunc = static_cast(&cgFunc); + RegOperand &dstOpnd = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand( + static_cast(dstMovOpnd.GetRegisterNumber()), k64BitSize, dstMovOpnd.GetRegisterType()); + prevInsn->SetOperand(kInsnFirstOpnd, dstOpnd); + prevInsn->SetMOperator(MOP_xmovri64); + bb.RemoveInsn(insn); + } + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrsb) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + if (thisMop == MOP_xsxtb32) { + bb.RemoveInsn(insn); + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrsh) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + if (thisMop == MOP_xsxth32) { + bb.RemoveInsn(insn); + } + } + } +} + +void EliminateSpecifcUXTAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®Opnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (prevInsn->IsCall() && + regOpnd0.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + (regOpnd1.GetRegisterNumber() == R0 || regOpnd1.GetRegisterNumber() == V0)) { + uint32 retSize = prevInsn->GetRetSize(); + if (retSize > 0 && + ((thisMop == MOP_xuxtb32 && retSize <= k1ByteSize) || + (thisMop == MOP_xuxth32 && retSize <= k2ByteSize) || + (thisMop == MOP_xuxtw64 && retSize <= k4ByteSize))) { + bb.RemoveInsn(insn); + } + return; + } + if (&insn == bb.GetFirstInsn() || regOpnd0.GetRegisterNumber() != regOpnd1.GetRegisterNumber() || + !prevInsn->IsMachineInstruction()) { + return; + } + if (thisMop == MOP_xuxtb32) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + /* check the top 56 bits of value */ + if (!(static_cast(value) & 0xFFFFFFFFFFFFFF00)) { + bb.RemoveInsn(insn); + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrb) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + bb.RemoveInsn(insn); + } + } else if (thisMop == MOP_xuxth32) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + if (!(static_cast(value) & 0xFFFFFFFFFFFF0000)) { + bb.RemoveInsn(insn); + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrh) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + bb.RemoveInsn(insn); + } + } else { + /* this_mop == MOP_xuxtw64 */ + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_wldrsb || + prevInsn->GetMachineOpcode() == MOP_wldrb || prevInsn->GetMachineOpcode() == MOP_wldrsh || + prevInsn->GetMachineOpcode() == MOP_wldrh || prevInsn->GetMachineOpcode() == MOP_wldr) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + /* 32-bit ldr does zero-extension by default, so this conversion can be skipped */ + bb.RemoveInsn(insn); + } + } +} + +void FmovRegAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *nextInsn = insn.GetNext(); + if (&insn == bb.GetFirstInsn()) { + return; + } + Insn *prevInsn = insn.GetPrev(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + MOperator newMop; + uint32 doOpt = 0; + if (prevMop == MOP_xvmovrv && thisMop == MOP_xvmovrv) { + doOpt = k32BitSize; + newMop = MOP_wmovrr; + } else if (prevMop == MOP_xvmovrd && thisMop == MOP_xvmovrd) { + doOpt = k64BitSize; + newMop = MOP_xmovrr; + } + if (doOpt == 0) { + return; + } + auto &curSrcRegOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &prevSrcRegOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + /* same src freg */ + if (curSrcRegOpnd.GetRegisterNumber() != prevSrcRegOpnd.GetRegisterNumber()) { + return; + } + auto &curDstRegOpnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t curDstReg = curDstRegOpnd.GetRegisterNumber(); + CG *cg = cgFunc.GetCG(); + /* optimize case 1 */ + auto &prevDstRegOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + regno_t prevDstReg = prevDstRegOpnd.GetRegisterNumber(); + auto *aarch64CGFunc = static_cast(&cgFunc); + RegOperand &dst = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(curDstReg), doOpt, kRegTyInt); + RegOperand &src = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(prevDstReg), doOpt, kRegTyInt); + Insn &newInsn = cg->BuildInstruction(newMop, dst, src); + bb.InsertInsnBefore(insn, newInsn); + bb.RemoveInsn(insn); + if (nextInsn == nullptr) { + return; + } + RegOperand &newOpnd = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(prevDstReg), doOpt, kRegTyInt); + uint32 opndNum = nextInsn->GetOperandSize(); + for (uint32 opndIdx = 0; opndIdx < opndNum; ++opndIdx) { + Operand &opnd = nextInsn->GetOperand(opndIdx); + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + if (base != nullptr) { + if (base->IsRegister()) { + auto *reg = static_cast(base); + if (reg->GetRegisterNumber() == curDstReg) { + memOpnd.SetBaseRegister(newOpnd); + } + } + } + Operand *offset = memOpnd.GetIndexRegister(); + if (offset != nullptr) { + if (offset->IsRegister()) { + auto *reg = static_cast(offset); + if (reg->GetRegisterNumber() == curDstReg) { + memOpnd.SetIndexRegister(newOpnd); + } + } + } + } else if (opnd.IsRegister()) { + /* Check if it is a source operand. */ + const AArch64MD *md = &AArch64CG::kMd[static_cast(nextInsn)->GetMachineOpcode()]; + auto *regProp = static_cast(md->operand[opndIdx]); + if (regProp->IsUse()) { + auto ® = static_cast(opnd); + if (reg.GetRegisterNumber() == curDstReg) { + nextInsn->SetOperand(opndIdx, newOpnd); + } + } + } + } +} + +void CbnzToCbzAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + /* reg has to be R0, since return value is in R0 */ + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (regOpnd0.GetRegisterNumber() != R0) { + return; + } + BB *nextBB = bb.GetNext(); + /* Make sure nextBB can only be reached by bb */ + if (nextBB->GetPreds().size() > 1 || nextBB->GetEhPreds().empty()) { + return; + } + BB *targetBB = nullptr; + auto it = bb.GetSuccsBegin(); + if (*it == nextBB) { + ++it; + } + targetBB = *it; + /* Make sure when nextBB is empty, targetBB is fallthru of bb. */ + if (targetBB != nextBB->GetNext()) { + return; + } + /* Is nextBB branch to the return-bb? */ + if (nextBB->GetSuccs().size() != 1) { + return; + } + BB *nextBBTarget = *(nextBB->GetSuccsBegin()); + if (nextBBTarget->GetKind() != BB::kBBReturn) { + return; + } + /* Next insn should be a mov R0 = 0 */ + Insn *movInsn = nextBB->GetFirstMachineInsn(); + if (movInsn == nullptr) { + return; + } + MOperator movInsnMop = movInsn->GetMachineOpcode(); + if (movInsnMop != MOP_xmovri32 && movInsnMop != MOP_xmovri64) { + return; + } + auto &movDest = static_cast(movInsn->GetOperand(kInsnFirstOpnd)); + if (movDest.GetRegisterNumber() != R0) { + return; + } + auto &movImm = static_cast(movInsn->GetOperand(kInsnSecondOpnd)); + if (movImm.GetValue() != 0) { + return; + } + Insn *brInsn = movInsn->GetNextMachineInsn(); + if (brInsn == nullptr) { + return; + } + if (brInsn->GetMachineOpcode() != MOP_xuncond) { + return; + } + /* Control flow looks nice, instruction looks nice */ + Operand &brTarget = brInsn->GetOperand(kInsnFirstOpnd); + insn.SetOperand(kInsnSecondOpnd, brTarget); + if (thisMop == MOP_wcbnz) { + insn.SetMOP(MOP_wcbz); + } else { + insn.SetMOP(MOP_xcbz); + } + nextBB->RemoveInsn(*movInsn); + nextBB->RemoveInsn(*brInsn); + /* nextBB is now a fallthru bb, not a goto bb */ + nextBB->SetKind(BB::kBBFallthru); + /* + * fix control flow, we have bb, nextBB, targetBB, nextBB_target + * connect bb -> nextBB_target erase targetBB + */ + it = bb.GetSuccsBegin(); + CHECK_FATAL(it != bb.GetSuccsEnd(), "succs is empty."); + if (*it == targetBB) { + bb.EraseSuccs(it); + bb.PushFrontSuccs(*nextBBTarget); + } else { + ++it; + bb.EraseSuccs(it); + bb.PushBackSuccs(*nextBBTarget); + } + for (auto targetBBIt = targetBB->GetPredsBegin(); targetBBIt != targetBB->GetPredsEnd(); ++targetBBIt) { + if (*targetBBIt == &bb) { + targetBB->ErasePreds(targetBBIt); + break; + } + } + for (auto nextIt = nextBBTarget->GetPredsBegin(); nextIt != nextBBTarget->GetPredsEnd(); ++nextIt) { + if (*nextIt == nextBB) { + nextBBTarget->ErasePreds(nextIt); + break; + } + } + nextBBTarget->PushBackPreds(bb); + + /* nextBB has no target, originally just branch target */ + nextBB->EraseSuccs(nextBB->GetSuccsBegin()); + ASSERT(nextBB->GetSuccs().empty(), "peep: branch target incorrect"); + /* Now make nextBB fallthru to targetBB */ + nextBB->PushFrontSuccs(*targetBB); + targetBB->PushBackPreds(*nextBB); +} + +void ContiLDRorSTRToSameMEMAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPrev(); + while (prevInsn != nullptr && !prevInsn->GetMachineOpcode() && prevInsn != bb.GetFirstInsn()) { + prevInsn = prevInsn->GetPrev(); + } + if (!insn.IsMachineInstruction() || prevInsn == nullptr) { + return; + } + bool loadAfterStore = false; + bool loadAfterLoad = false; + MOperator thisMop = insn.GetMachineOpcode(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + /* + * store regB, RegC, offset + * load regA, RegC, offset + */ + if ((thisMop == MOP_xldr && prevMop == MOP_xstr) || (thisMop == MOP_wldr && prevMop == MOP_wstr) || + (thisMop == MOP_dldr && prevMop == MOP_dstr) || (thisMop == MOP_sldr && prevMop == MOP_sstr)) { + loadAfterStore = true; + } + /* + * load regA, RegC, offset + * load regB, RegC, offset + */ + if ((thisMop == MOP_xldr || thisMop == MOP_wldr || thisMop == MOP_dldr || thisMop == MOP_sldr) && + prevMop == thisMop) { + loadAfterLoad = true; + } + if (!loadAfterStore && !loadAfterLoad) { + return; + } + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + ASSERT(prevInsn->GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + + auto &memOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + AArch64MemOperand::AArch64AddressingMode addrMode1 = memOpnd1.GetAddrMode(); + if (addrMode1 != AArch64MemOperand::kAddrModeBOi || (!memOpnd1.IsIntactIndexed())) { + return; + } + + auto *base1 = static_cast(memOpnd1.GetBaseRegister()); + ASSERT(base1 == nullptr || !base1->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset1 = memOpnd1.GetOffsetImmediate(); + + auto &memOpnd2 = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + AArch64MemOperand::AArch64AddressingMode addrMode2 = memOpnd2.GetAddrMode(); + if (addrMode2 != AArch64MemOperand::kAddrModeBOi || (!memOpnd2.IsIntactIndexed())) { + return; + } + + auto *base2 = static_cast(memOpnd2.GetBaseRegister()); + ASSERT(base2 == nullptr || !base2->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset2 = memOpnd2.GetOffsetImmediate(); + + if (base1 == nullptr || base2 == nullptr || offset1 == nullptr || offset2 == nullptr) { + return; + } + + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + int offsetVal1 = offset1->GetOffsetValue(); + int offsetVal2 = offset2->GetOffsetValue(); + if (base1->GetRegisterNumber() != base2->GetRegisterNumber() || + reg1.GetRegisterType() != reg2.GetRegisterType() || reg1.GetSize() != reg2.GetSize() || + offsetVal1 != offsetVal2) { + return; + } + if (loadAfterStore && reg1.GetRegisterNumber() != reg2.GetRegisterNumber()) { + /* replace it with mov */ + MOperator newOp = MOP_wmovrr; + if (reg1.GetRegisterType() == kRegTyInt) { + newOp = (reg1.GetSize() <= k32BitSize) ? MOP_wmovrr : MOP_xmovrr; + } else if (reg1.GetRegisterType() == kRegTyFloat) { + newOp = (reg1.GetSize() <= k32BitSize) ? MOP_xvmovs : MOP_xvmovd; + } + CG *cg = cgFunc.GetCG(); + bb.InsertInsnAfter(*prevInsn, cg->BuildInstruction(newOp, reg1, reg2)); + bb.RemoveInsn(insn); + insn = *(prevInsn->GetNext()); + } else if (reg1.GetRegisterNumber() == reg2.GetRegisterNumber() && + base1->GetRegisterNumber() != reg2.GetRegisterNumber()) { + bb.RemoveInsn(insn); + insn = *prevInsn; + } +} + +void RemoveIncDecRefAArch64::Run(BB &bb, Insn &insn) { + ASSERT(insn.GetMachineOpcode() == MOP_xbl, "expect a xbl MOP at RemoveIncDecRef optimization"); + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if (target.GetName() == "MCC_IncDecRef_NaiveRCFast" && mopMov == MOP_xmovrr && + static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() == R1 && + static_cast(insnMov->GetOperand(kInsnSecondOpnd)).GetRegisterNumber() == R0) { + bb.RemoveInsn(*insnMov); + bb.RemoveInsn(insn); + bb.SetKind(BB::kBBFallthru); + } +} + +#ifdef USE_32BIT_REF +constexpr uint32 kRefSize = 32; +#else +constexpr uint32 kRefSize = 64; +#endif + +void InlineReadBarriersAArch64::Run(BB &bb, Insn &insn) { + if (!CGOptions::IsGCOnly()) { /* Inline read barriers only enabled for GCONLY. */ + return; + } + const std::string &barrierName = GetReadBarrierName(insn); + CG *cg = cgFunc.GetCG(); + if (barrierName == kMccDummy) { + /* remove dummy call. */ + bb.RemoveInsn(insn); + } else { + /* replace barrier function call with load instruction. */ + bool isVolatile = (barrierName == kMccLoadRefV || barrierName == kMccLoadRefVS); + bool isStatic = (barrierName == kMccLoadRefS || barrierName == kMccLoadRefVS); + /* refSize is 32 if USE_32BIT_REF defined, otherwise 64. */ + const uint32 refSize = kRefSize; + auto *aarch64CGFunc = static_cast(&cgFunc); + MOperator loadOp = GetLoadOperator(refSize, isVolatile); + RegOperand ®Op = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(R0, refSize, kRegTyInt); + AArch64reg addrReg = isStatic ? R0 : R1; + MemOperand &addr = aarch64CGFunc->CreateMemOpnd(addrReg, 0, refSize); + Insn &loadInsn = cg->BuildInstruction(loadOp, regOp, addr); + bb.ReplaceInsn(insn, loadInsn); + } + bb.SetKind(BB::kBBFallthru); + bool isTailCall = (insn.GetMachineOpcode() == MOP_tail_call_opt_xbl); + if (isTailCall) { + /* add 'ret' instruction for tail call optimized load barrier. */ + Insn &retInsn = cg->BuildInstruction(MOP_xret); + bb.AppendInsn(retInsn); + bb.SetKind(BB::kBBReturn); + } +} + +void ReplaceDivToMultiAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + Insn *prePrevInsn = prevInsn->GetPreviousMachineInsn(); + auto &sdivOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &sdivOpnd2 = static_cast(insn.GetOperand(kInsnThirdOpnd)); + if (sdivOpnd1.GetRegisterNumber() == sdivOpnd2.GetRegisterNumber() || sdivOpnd1.GetRegisterNumber() == R16 || + sdivOpnd2.GetRegisterNumber() == R16 || prePrevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + MOperator prePrevMop = prePrevInsn->GetMachineOpcode(); + if (prevMop && (prevMop == MOP_wmovkri16) && prePrevMop && (prePrevMop == MOP_xmovri32)) { + /* Check if dest operand of insn is idential with register of prevInsn and prePrevInsn. */ + if ((&(prevInsn->GetOperand(kInsnFirstOpnd)) != &sdivOpnd2) || + (&(prePrevInsn->GetOperand(kInsnFirstOpnd)) != &sdivOpnd2)) { + return; + } + auto &prevLsl = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (prevLsl.GetShiftAmount() != k16BitSize) { + return; + } + auto &prevImmOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + auto &prePrevImmOpnd = static_cast(prePrevInsn->GetOperand(kInsnSecondOpnd)); + /* + * expect the immediate value of first mov is 0x086A0 which matches 0x186A0 + * because 0x10000 is ignored in 32 bits register + */ + if ((prevImmOpnd.GetValue() != 1) || (prePrevImmOpnd.GetValue() != 34464)) { + return; + } + auto *aarch64CGFunc = static_cast(&cgFunc); + CG *cg = cgFunc.GetCG(); + /* mov w16, #0x588f */ + RegOperand &tempOpnd = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R16), + k64BitSize, kRegTyInt); + /* create a immedate operand with this specific value */ + ImmOperand &multiplierLow = aarch64CGFunc->CreateImmOperand(0x588f, k32BitSize, false); + Insn &multiplierLowInsn = cg->BuildInstruction(MOP_xmovri32, tempOpnd, multiplierLow); + bb.InsertInsnBefore(*prePrevInsn, multiplierLowInsn); + + /* + * movk w16, #0x4f8b, LSL #16 + * create a immedate operand with this specific value + */ + ImmOperand &multiplierHigh = aarch64CGFunc->CreateImmOperand(0x4f8b, k32BitSize, false); + LogicalShiftLeftOperand *multiplierHighLsl = aarch64CGFunc->GetLogicalShiftLeftOperand(k16BitSize, true); + Insn &multiplierHighInsn = + cg->BuildInstruction(MOP_wmovkri16, tempOpnd, multiplierHigh, *multiplierHighLsl); + bb.InsertInsnBefore(*prePrevInsn, multiplierHighInsn); + + /* smull x16, w0, w16 */ + Insn &newSmullInsn = + cg->BuildInstruction(MOP_xsmullrrr, tempOpnd, sdivOpnd1, tempOpnd); + bb.InsertInsnBefore(*prePrevInsn, newSmullInsn); + + /* lsr x16, x16, #32 */ + ImmOperand &dstLsrImmHigh = aarch64CGFunc->CreateImmOperand(k32BitSize, k32BitSize, false); + Insn &dstLsrInsnHigh = + cg->BuildInstruction(MOP_xlsrrri6, tempOpnd, tempOpnd, dstLsrImmHigh); + bb.InsertInsnBefore(*prePrevInsn, dstLsrInsnHigh); + + /* add x16, x16, w0, SXTW */ + Operand &sxtw = aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, 0, 3); + Insn &addInsn = + cg->BuildInstruction(MOP_xxwaddrrre, tempOpnd, tempOpnd, sdivOpnd1, sxtw); + bb.InsertInsnBefore(*prePrevInsn, addInsn); + + /* lsr x16, x16, #17 */ + ImmOperand &dstLsrImmChange = aarch64CGFunc->CreateImmOperand(17, k32BitSize, false); + Insn &dstLsrInsnChange = + cg->BuildInstruction(MOP_xlsrrri6, tempOpnd, tempOpnd, dstLsrImmChange); + bb.InsertInsnBefore(*prePrevInsn, dstLsrInsnChange); + + /* add x2, x16, x0, LSR #31 */ + auto &sdivOpnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t sdivOpnd0RegNO = sdivOpnd0.GetRegisterNumber(); + RegOperand &extendSdivOpnd0 = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(sdivOpnd0RegNO), + k64BitSize, kRegTyInt); + + regno_t sdivOpnd1RegNum = sdivOpnd1.GetRegisterNumber(); + RegOperand &extendSdivOpnd1 = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(sdivOpnd1RegNum), + k64BitSize, kRegTyInt); + /* shift bit amount is thirty-one at this insn */ + BitShiftOperand &addLsrOpnd = aarch64CGFunc->CreateBitShiftOperand(BitShiftOperand::kLSR, 31, 6); + Insn &addLsrInsn = cg->BuildInstruction(MOP_xaddrrrs, extendSdivOpnd0, tempOpnd, + extendSdivOpnd1, addLsrOpnd); + bb.InsertInsnBefore(*prePrevInsn, addLsrInsn); + + /* + * remove insns + * Check if x1 is used after sdiv insn, and if it is in live-out. + */ + if (sdivOpnd2.GetRegisterNumber() != sdivOpnd0.GetRegisterNumber()) { + if (IfOperandIsLiveAfterInsn(sdivOpnd2, insn)) { + /* Only remove div instruction. */ + bb.RemoveInsn(insn); + return; + } + } + + bb.RemoveInsn(*prePrevInsn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(insn); + } +} + +void AndCmpBranchesToCsetAArch64::Run(BB &bb, Insn &insn) { + /* prevInsn must be "cmp" insn */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr || + (prevInsn->GetMachineOpcode() != MOP_wcmpri && prevInsn->GetMachineOpcode() != MOP_xcmpri)) { + return; + } + /* prevPrevInsn must be "and" insn */ + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr || + (prevPrevInsn->GetMachineOpcode() != MOP_wandrri12 && prevPrevInsn->GetMachineOpcode() != MOP_xandrri13)) { + return; + } + + auto &csetCond = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &cmpImm = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + int64 cmpImmVal = cmpImm.GetValue(); + auto &andImm = static_cast(prevPrevInsn->GetOperand(kInsnThirdOpnd)); + int64 andImmVal = andImm.GetValue(); + if ((csetCond.GetCode() == CC_EQ && cmpImmVal == andImmVal) || + (csetCond.GetCode() == CC_NE && cmpImmVal == 0)) { + /* if flag_reg of "cmp" is live later, we can't remove cmp insn. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB())) { + return; + } + + auto &csetReg = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (andImmVal == 1) { + if (!RegOperand::IsSameRegNO(csetReg, prevInsn->GetOperand(kInsnSecondOpnd)) || + !RegOperand::IsSameRegNO(csetReg, prevPrevInsn->GetOperand(kInsnFirstOpnd))) { + return; + } + /* save the "and" insn only. */ + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + } else { + if (!RegOperand::IsSameReg(csetReg, prevInsn->GetOperand(kInsnSecondOpnd)) || + !RegOperand::IsSameReg(csetReg, prevPrevInsn->GetOperand(kInsnFirstOpnd)) || + !RegOperand::IsSameReg(csetReg, prevPrevInsn->GetOperand(kInsnSecondOpnd))) { + return; + } + + /* andImmVal is n power of 2 */ + int n = logValueAtBase2(andImmVal); + if (n < 0) { + return; + } + + /* create ubfx insn */ + MOperator ubfxOp = (csetReg.GetSize() <= k32BitSize) ? MOP_wubfxrri5i5 : MOP_xubfxrri6i6; + auto ® = static_cast(csetReg); + CG *cg = cgFunc.GetCG(); + auto *aarch64CGFunc = static_cast(&cgFunc); + ImmOperand &bitPos = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + ImmOperand &bitSize = aarch64CGFunc->CreateImmOperand(1, k8BitSize, false); + Insn &ubfxInsn = cg->BuildInstruction(ubfxOp, reg, reg, bitPos, bitSize); + bb.InsertInsnBefore(*prevPrevInsn, ubfxInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } + } +} + +void ZeroCmpBranchesAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (!insn.IsBranch() || insn.GetOperandSize() <= kInsnSecondOpnd || prevInsn == nullptr) { + return; + } + if (!insn.GetOperand(kInsnSecondOpnd).IsLabel()) { + return; + } + LabelOperand *label = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + RegOperand *regOpnd = nullptr; + RegOperand *reg0 = nullptr; + RegOperand *reg1 = nullptr; + MOperator newOp = MOP_undef; + ImmOperand *imm = nullptr; + switch (prevInsn->GetMachineOpcode()) { + case MOP_wcmpri: + case MOP_xcmpri: { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + imm = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (imm->GetValue() != 0) { + return; + } + if (insn.GetMachineOpcode() == MOP_bge) { + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else if (insn.GetMachineOpcode() == MOP_blt) { + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + } + case MOP_wcmprr: + case MOP_xcmprr: { + reg0 = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + reg1 = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (!reg0->IsZeroRegister() && !reg1->IsZeroRegister()) { + return; + } + switch (insn.GetMachineOpcode()) { + case MOP_bge: + if (reg1->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else { + return; + } + break; + case MOP_ble: + if (reg0->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else { + return; + } + break; + case MOP_blt: + if (reg1->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + case MOP_bgt: + if (reg0->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + default: + return; + } + break; + } + default: + return; + } + CG *cg = cgFunc.GetCG(); + auto aarch64CGFunc = static_cast(&cgFunc); + ImmOperand &bitp = aarch64CGFunc->CreateImmOperand( + (regOpnd->GetSize() <= k32BitSize) ? (k32BitSize - 1) : (k64BitSize - 1), k8BitSize, false); + bb.InsertInsnAfter( + insn, cg->BuildInstruction(newOp, *static_cast(regOpnd), bitp, *label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); +} + +/* + * if there is define point of checkInsn->GetOperand(opndIdx) between startInsn and firstInsn + * return define insn. else return nullptr + */ +const Insn *CmpCsetAArch64::DefInsnOfOperandInBB(const Insn &startInsn, const Insn &checkInsn, int opndIdx) { + Insn *prevInsn = nullptr; + for (const Insn *insn = &startInsn; insn != nullptr; insn = prevInsn) { + prevInsn = insn->GetPreviousMachineInsn(); + if (!insn->IsMachineInstruction()) { + continue; + } + /* checkInsn.GetOperand(opndIdx) is thought modified conservatively */ + if (insn->IsCall()) { + return insn; + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsDef()) { + continue; + } + /* Operand is base reg of Memory, defined by str */ + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + ASSERT(base->IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(*base, checkInsn.GetOperand(opndIdx)) && + memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + return insn; + } + } else { + ASSERT(opnd.IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(checkInsn.GetOperand(opndIdx), opnd)) { + return insn; + } + } + } + } + return nullptr; +} + +bool CmpCsetAArch64::OpndDefByOneValidBit(const Insn &defInsn) { + MOperator defMop = defInsn.GetMachineOpcode(); + switch (defMop) { + case MOP_wcsetrc: + case MOP_xcsetrc: + return true; + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = defInsn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + return (defConstValue == 0 || defConstValue == 1); + } + case MOP_xmovrr: + case MOP_wmovrr: + return defInsn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + case MOP_wlsrrri5: + case MOP_xlsrrri6: { + Operand &opnd2 = defInsn.GetOperand(kInsnThirdOpnd); + ASSERT(opnd2.IsIntImmediate(), "expects ImmOperand"); + auto &opndImm = static_cast(opnd2); + int64 shiftBits = opndImm.GetValue(); + return ((defMop == MOP_wlsrrri5 && shiftBits == (k32BitSize - 1)) || + (defMop == MOP_xlsrrri6 && shiftBits == (k64BitSize - 1))); + } + default: + return false; + } +} + +/* + * help function for cmpcset optimize + * if all define points of used opnd in insn has only one valid bit,return true. + * for cmp reg,#0(#1), that is checking for reg + */ +bool CmpCsetAArch64::CheckOpndDefPoints(Insn &checkInsn, int opndIdx) { + /* check current BB */ + const Insn *defInsn = DefInsnOfOperandInBB(checkInsn, checkInsn, opndIdx); + if (defInsn != nullptr) { + return OpndDefByOneValidBit(*defInsn); + } + /* check pred */ + for (auto predBB : checkInsn.GetBB()->GetPreds()) { + const Insn *tempInsn = nullptr; + if (predBB->GetLastInsn() != nullptr) { + tempInsn = DefInsnOfOperandInBB(*predBB->GetLastInsn(), checkInsn, opndIdx); + } + if (tempInsn == nullptr || !OpndDefByOneValidBit(*tempInsn)) { + return false; + } + } + return true; +} + +/* Check there is use point of rflag start from startInsn to current bb bottom */ +bool CmpCsetAArch64::FlagUsedLaterInCurBB(const BB &bb, Insn &startInsn) const { + if (&bb != startInsn.GetBB()) { + return false; + } + Insn *nextInsn = nullptr; + for (Insn *insn = &startInsn; insn != nullptr; insn = nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + /* + * For condition operand, such as NE, EQ and so on, the register number should be + * same with RFLAG, we only need check the property of use/def. + */ + if (!opnd.IsConditionCode()) { + continue; + } + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isUse = regProp->IsUse(); + if (isUse) { + return true; + } else { + ASSERT(regProp->IsDef(), "register should be redefined."); + return false; + } + } + } + return false; +} + +void CmpCsetAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator firstMop = insn.GetMachineOpcode(); + MOperator secondMop = nextInsn->GetMachineOpcode(); + if ((firstMop == MOP_wcmpri || firstMop == MOP_xcmpri) && + (secondMop == MOP_wcsetrc || secondMop == MOP_xcsetrc)) { + Operand &cmpFirstOpnd = insn.GetOperand(kInsnSecondOpnd); + /* get ImmOperand, must be 0 or 1 */ + Operand &cmpSecondOpnd = insn.GetOperand(kInsnThirdOpnd); + auto &cmpFlagReg = static_cast(insn.GetOperand(kInsnFirstOpnd)); + ASSERT(cmpSecondOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &cmpConst = static_cast(cmpSecondOpnd); + int64 cmpConstVal = cmpConst.GetValue(); + Operand &csetFirstOpnd = nextInsn->GetOperand(kInsnFirstOpnd); + if ((cmpConstVal != 0 && cmpConstVal != 1) || !CheckOpndDefPoints(insn, 1) || + (nextInsn->GetNextMachineInsn() != nullptr && + FlagUsedLaterInCurBB(bb, *nextInsn->GetNextMachineInsn())) || + FindRegLiveOut(cmpFlagReg, *insn.GetBB())) { + return; + } + + Insn *csetInsn = nextInsn; + nextInsn = nextInsn->GetNextMachineInsn(); + auto &cond = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if ((cmpConstVal == 0 && cond.GetCode() == CC_NE) || (cmpConstVal == 1 && cond.GetCode() == CC_EQ)) { + if (RegOperand::IsSameRegNO(cmpFirstOpnd, csetFirstOpnd)) { + bb.RemoveInsn(insn); + bb.RemoveInsn(*csetInsn); + } else { + if (cmpFirstOpnd.GetSize() != csetFirstOpnd.GetSize()) { + return; + } + MOperator mopCode = (cmpFirstOpnd.GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, csetFirstOpnd, cmpFirstOpnd); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } else if ((cmpConstVal == 1 && cond.GetCode() == CC_NE) || (cmpConstVal == 0 && cond.GetCode() == CC_EQ)) { + MOperator mopCode = (cmpFirstOpnd.GetSize() == k64BitSize) ? MOP_xeorrri13 : MOP_weorrri12; + ImmOperand &one = static_cast(&cgFunc)->CreateImmOperand(1, k8BitSize, false); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, csetFirstOpnd, cmpFirstOpnd, one); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } +} + +/* + * help function for DeleteMovAfterCbzOrCbnz + * input: + * bb: the bb to be checked out + * checkCbz: to check out BB end with cbz or cbnz, if cbz, input true + * opnd: for MOV reg, #0, opnd indicate reg + * return: + * according to cbz, return true if insn is cbz or cbnz and the first operand of cbz(cbnz) is same as input + * operand + */ +bool DeleteMovAfterCbzOrCbnzAArch64::PredBBCheck(BB &bb, bool checkCbz, const Operand &opnd) const { + if (bb.GetKind() != BB::kBBIf) { + return false; + } + + Insn *condBr = cgcfg->FindLastCondBrInsn(bb); + ASSERT(condBr != nullptr, "condBr must be found"); + if (!cgcfg->IsCompareAndBranchInsn(*condBr)) { + return false; + } + MOperator mOp = condBr->GetMachineOpcode(); + if (checkCbz && mOp != MOP_wcbz && mOp != MOP_xcbz) { + return false; + } + if (!checkCbz && mOp != MOP_xcbnz && mOp != MOP_wcbnz) { + return false; + } + return RegOperand::IsSameRegNO(condBr->GetOperand(kInsnFirstOpnd), opnd); +} + +bool DeleteMovAfterCbzOrCbnzAArch64::OpndDefByMovZero(const Insn &insn) const { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + if (defConstValue == 0) { + return true; + } + return false; + } + case MOP_xmovrr: + case MOP_wmovrr: { + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(secondOpnd.IsRegister(), "expects RegOperand here"); + auto ®Opnd = static_cast(secondOpnd); + return regOpnd.IsZeroRegister(); + } + default: + return false; + } +} + +/* check whether predefine insn of first operand of test_insn is exist in current BB */ +bool DeleteMovAfterCbzOrCbnzAArch64::NoPreDefine(Insn &testInsn) const { + Insn *nextInsn = nullptr; + for (Insn *insn = testInsn.GetBB()->GetFirstInsn(); insn != nullptr && insn != &testInsn; insn = nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + if (!insn->IsMachineInstruction()) { + continue; + } + ASSERT(!insn->IsCall(), "CG internal error, call insn should not be at the middle of the BB."); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsDef()) { + continue; + } + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + ASSERT(base->IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(*base, testInsn.GetOperand(kInsnFirstOpnd)) && + memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + return false; + } + } else { + ASSERT(opnd.IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(testInsn.GetOperand(kInsnFirstOpnd), opnd)) { + return false; + } + } + } + } + return true; +} +void DeleteMovAfterCbzOrCbnzAArch64::ProcessBBHandle(BB *processBB, const BB &bb, const Insn &insn) { + FOR_BB_INSNS_SAFE(processInsn, processBB, nextProcessInsn) { + nextProcessInsn = processInsn->GetNextMachineInsn(); + if (!processInsn->IsMachineInstruction()) { + continue; + } + /* register may be a caller save register */ + if (processInsn->IsCall()) { + break; + } + if (!OpndDefByMovZero(*processInsn) || !NoPreDefine(*processInsn) || + !RegOperand::IsSameRegNO(processInsn->GetOperand(kInsnFirstOpnd), insn.GetOperand(kInsnFirstOpnd))) { + continue; + } + bool toDoOpt = true; + MOperator condBrMop = insn.GetMachineOpcode(); + /* process elseBB, other preds must be cbz */ + if (condBrMop == MOP_wcbnz || condBrMop == MOP_xcbnz) { + /* check out all preds of process_bb */ + for (auto *processBBPred : processBB->GetPreds()) { + if (processBBPred == &bb) { + continue; + } + if (!PredBBCheck(*processBBPred, true, processInsn->GetOperand(kInsnFirstOpnd))) { + toDoOpt = false; + break; + } + } + } else { + /* process ifBB, other preds can be cbz or cbnz(one at most) */ + for (auto processBBPred : processBB->GetPreds()) { + if (processBBPred == &bb) { + continue; + } + /* for cbnz pred, there is one at most */ + if (!PredBBCheck(*processBBPred, processBBPred != processBB->GetPrev(), + processInsn->GetOperand(kInsnFirstOpnd))) { + toDoOpt = false; + break; + } + } + } + if (!toDoOpt) { + continue; + } + processBB->RemoveInsn(*processInsn); + } +} + +/* ldr wn, [x1, wn, SXTW] + * add x2, wn, x2 + */ +bool ComplexMemOperandAddAArch64::IsExpandBaseOpnd(const Insn &insn, Insn &prevInsn) { + MOperator prevMop = prevInsn.GetMachineOpcode(); + if (prevMop >= MOP_wldrsb && prevMop <= MOP_xldr && + prevInsn.GetOperand(kInsnFirstOpnd).Equals(insn.GetOperand(kInsnSecondOpnd))) { + return true; + } + return false; +} + +void ComplexMemOperandAddAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + Insn *prevInsn = insn.GetPreviousMachineInsn(); + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrr && thisMop != MOP_waddrrr) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + if (!IsMemOperandOptPattern(insn, *nextInsn)) { + return; + } + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + auto newBaseOpnd = static_cast(&insn.GetOperand(kInsnSecondOpnd)); + auto newIndexOpnd = static_cast(&insn.GetOperand(kInsnThirdOpnd)); + regno_t memBaseOpndRegNO = newBaseOpnd->GetRegisterNumber(); + if (newBaseOpnd->GetSize() <= k32BitSize && prevInsn != nullptr && IsExpandBaseOpnd(insn, *prevInsn)) { + newBaseOpnd = &aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(memBaseOpndRegNO), + k64BitSize, kRegTyInt); + } + if (newBaseOpnd->GetSize() != k64BitSize) { + return; + } + if (newIndexOpnd->GetSize() <= k32BitSize) { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), newBaseOpnd, + newIndexOpnd, 0, false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } else { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), newBaseOpnd, + newIndexOpnd, nullptr, nullptr); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } + bb.RemoveInsn(insn); + } +} + +void DeleteMovAfterCbzOrCbnzAArch64::Run(BB &bb, Insn &insn) { + if (bb.GetKind() != BB::kBBIf) { + return; + } + if (&insn != cgcfg->FindLastCondBrInsn(bb)) { + return; + } + if (!cgcfg->IsCompareAndBranchInsn(insn)) { + return; + } + BB *processBB = nullptr; + if (bb.GetNext() == maplebe::CGCFG::GetTargetSuc(bb)) { + return; + } + + MOperator condBrMop = insn.GetMachineOpcode(); + if (condBrMop == MOP_wcbnz || condBrMop == MOP_xcbnz) { + processBB = bb.GetNext(); + } else { + processBB = maplebe::CGCFG::GetTargetSuc(bb); + } + + ASSERT(processBB != nullptr, "process_bb is null in DeleteMovAfterCbzOrCbnzAArch64::Run"); + ProcessBBHandle(processBB, bb, insn); +} + +MOperator OneHoleBranchesPreAArch64::FindNewMop(const BB &bb, const Insn &insn) const { + MOperator newOp = MOP_undef; + if (&insn != bb.GetLastInsn()) { + return newOp; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_wcbz && thisMop != MOP_wcbnz && thisMop != MOP_xcbz && thisMop != MOP_xcbnz) { + return newOp; + } + switch (thisMop) { + case MOP_wcbz: + newOp = MOP_wtbnz; + break; + case MOP_wcbnz: + newOp = MOP_wtbz; + break; + case MOP_xcbz: + newOp = MOP_xtbnz; + break; + case MOP_xcbnz: + newOp = MOP_xtbz; + break; + default: + CHECK_FATAL(false, "can not touch here"); + break; + } + return newOp; +} + +void OneHoleBranchesPreAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + MOperator newOp = FindNewMop(bb, insn); + if (newOp == MOP_undef) { + return; + } + Insn *prevInsn = insn.GetPreviousMachineInsn(); + LabelOperand &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (prevInsn != nullptr && prevInsn->GetMachineOpcode() == MOP_xuxtb32 && + (static_cast(prevInsn->GetOperand(kInsnSecondOpnd)).GetValidBitsNum() <= k8BitSize || + static_cast(prevInsn->GetOperand(kInsnFirstOpnd)).GetValidBitsNum() <= k8BitSize)) { + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + insn.SetOperand(kInsnFirstOpnd, prevInsn->GetOperand(kInsnSecondOpnd)); + bb.RemoveInsn(*prevInsn); + } + if (prevInsn != nullptr && + (prevInsn->GetMachineOpcode() == MOP_xeorrri13 || prevInsn->GetMachineOpcode() == MOP_weorrri12) && + static_cast(prevInsn->GetOperand(kInsnThirdOpnd)).GetValue() == 1) { + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr) { + return; + } + if (prevPrevInsn->GetMachineOpcode() != MOP_xuxtb32 || + static_cast(prevPrevInsn->GetOperand(kInsnSecondOpnd)).GetValidBitsNum() != 1) { + return; + } + if (&(prevPrevInsn->GetOperand(kInsnFirstOpnd)) != &(prevInsn->GetOperand(kInsnSecondOpnd))) { + return; + } + ImmOperand &oneHoleOpnd = aarch64CGFunc->CreateImmOperand(0, k8BitSize, false); + auto ®Operand = static_cast(prevPrevInsn->GetOperand(kInsnSecondOpnd)); + bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(newOp, regOperand, oneHoleOpnd, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } +} + +bool LoadFloatPointAArch64::FindLoadFloatPoint(std::vector &optInsn, Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + optInsn.clear(); + if (mOp != MOP_xmovzri16) { + return false; + } + optInsn.push_back(&insn); + + Insn *insnMov2 = insn.GetNextMachineInsn(); + if (insnMov2 == nullptr) { + return false; + } + if (insnMov2->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.push_back(insnMov2); + + Insn *insnMov3 = insnMov2->GetNextMachineInsn(); + if (insnMov3 == nullptr) { + return false; + } + if (insnMov3->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.push_back(insnMov3); + + Insn *insnMov4 = insnMov3->GetNextMachineInsn(); + if (insnMov4 == nullptr) { + return false; + } + if (insnMov4->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.push_back(insnMov4); + return true; +} + +bool LoadFloatPointAArch64::IsPatternMatch(const std::vector &optInsn) { + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + if ((static_cast(insn1->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn2->GetOperand(kInsnFirstOpnd)).GetRegisterNumber()) || + (static_cast(insn2->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn3->GetOperand(kInsnFirstOpnd)).GetRegisterNumber()) || + (static_cast(insn3->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn4->GetOperand(kInsnFirstOpnd)).GetRegisterNumber())) { + return false; + } + if ((static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != 0) || + (static_cast(insn2->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + k16BitSize) || + (static_cast(insn3->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + k32BitSize) || + (static_cast(insn4->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + (k16BitSize + k32BitSize))) { + return false; + } + return true; +} + +void LoadFloatPointAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + /* logical shift left values in three optimized pattern */ + std::vector optInsn; + if (FindLoadFloatPoint(optInsn, insn) && IsPatternMatch(optInsn)) { + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + auto &movConst1 = static_cast(insn1->GetOperand(kInsnSecondOpnd)); + auto &movConst2 = static_cast(insn2->GetOperand(kInsnSecondOpnd)); + auto &movConst3 = static_cast(insn3->GetOperand(kInsnSecondOpnd)); + auto &movConst4 = static_cast(insn4->GetOperand(kInsnSecondOpnd)); + /* movk/movz's immOpnd is 16-bit unsigned immediate */ + uint64 value = static_cast(movConst1.GetValue()) + + (static_cast(movConst2.GetValue()) << k16BitSize) + + (static_cast(movConst3.GetValue()) << k32BitSize) + + (static_cast(movConst4.GetValue()) << (k16BitSize + k32BitSize)); + + LabelIdx lableIdx = cgFunc.CreateLabel(); + LabelOperand &target = aarch64CGFunc->GetOrCreateLabelOperand(lableIdx); + cgFunc.InsertLabelMap(lableIdx, value); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_xldli, insn4->GetOperand(kInsnFirstOpnd), + target); + bb.InsertInsnAfter(*insn4, newInsn); + bb.RemoveInsn(*insn1); + bb.RemoveInsn(*insn2); + bb.RemoveInsn(*insn3); + bb.RemoveInsn(*insn4); + } +} + +void ReplaceOrrToMovAArch64::Run(BB &bb, Insn &insn){ + Operand *opndOfOrr = nullptr; + ImmOperand *immOpnd = nullptr; + AArch64RegOperand *reg1 = nullptr; + AArch64RegOperand *reg2 = nullptr; + MOperator thisMop = insn.GetMachineOpcode(); + MOperator newMop = MOP_undef; + switch (thisMop) { + case MOP_wiorri12r: { /* opnd1 is Reg32 and opnd2 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnSecondOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnThirdOpnd)); + newMop = MOP_wmovrr; + break; + } + case MOP_wiorrri12: { /* opnd1 is reg32 and opnd3 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnThirdOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + newMop = MOP_wmovrr; + break; + } + case MOP_xiorri13r: { /* opnd1 is Reg64 and opnd2 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnSecondOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnThirdOpnd)); + newMop = MOP_xmovrr; + break; + } + case MOP_xiorrri13: { /* opnd1 is reg64 and opnd3 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnThirdOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + newMop = MOP_xmovrr; + break; + } + default: + break; + } + ASSERT(opndOfOrr->IsIntImmediate(), "expects immediate operand"); + immOpnd = static_cast(opndOfOrr); + if (immOpnd->GetValue() == 0) { + reg1 = &static_cast(insn.GetOperand(kInsnFirstOpnd)); + bb.ReplaceInsn(insn, cgFunc.GetCG()->BuildInstruction(newMop, *reg1, *reg2)); + } +} + +void ReplaceCmpToCmnAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + MOperator thisMop = insn.GetMachineOpcode(); + MOperator nextMop = MOP_undef; + MOperator newMop = MOP_undef; + switch (thisMop) { + case MOP_xmovri32: { + nextMop = MOP_wcmprr; + newMop = MOP_wcmnri; + break; + } + case MOP_xmovri64: { + nextMop = MOP_xcmprr; + newMop = MOP_xcmnri; + break; + } + default: + break; + } + Operand *opnd1OfMov = &(insn.GetOperand(kInsnFirstOpnd)); + Operand *opnd2OfMov = &(insn.GetOperand(kInsnSecondOpnd)); + if (opnd2OfMov->IsIntImmediate()) { + ImmOperand *immOpnd = static_cast(opnd2OfMov); + int64 iVal = immOpnd->GetValue(); + if (kNegativeImmLowerLimit <= iVal && iVal < 0) { + Insn *nextInsn = insn.GetNextMachineInsn(); /* get the next insn to judge if it is a cmp instruction. */ + if (nextInsn != nullptr) { + if (nextInsn->GetMachineOpcode() == nextMop) { + Operand *opndCmp2 = &(nextInsn->GetOperand(kInsnSecondOpnd)); + Operand *opndCmp3 = &(nextInsn->GetOperand(kInsnThirdOpnd)); /* get the third operand of cmp */ + /* if the first operand of mov equals the third operand of cmp, match the pattern. */ + if (opnd1OfMov == opndCmp3) { + ImmOperand &newOpnd = aarch64CGFunc->CreateImmOperand(iVal * (-1), immOpnd->GetSize(), false); + Operand ®Flag = nextInsn->GetOperand(kInsnFirstOpnd); + bb.ReplaceInsn(*nextInsn, cgFunc.GetCG()->BuildInstruction(MOperator(newMop), regFlag, + *opndCmp2, newOpnd)); + } + } + } + } + } +} + +void RemoveIncRefAArch64::Run(BB &bb, Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + if (mOp != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_IncDecRef_NaiveRCFast") { + return; + } + Insn *insnMov2 = insn.GetPreviousMachineInsn(); + if (insnMov2 == nullptr) { + return; + } + MOperator mopMov2 = insnMov2->GetMachineOpcode(); + if (mopMov2 != MOP_xmovrr) { + return; + } + Insn *insnMov1 = insnMov2->GetPreviousMachineInsn(); + if (insnMov1 == nullptr) { + return; + } + MOperator mopMov1 = insnMov1->GetMachineOpcode(); + if (mopMov1 != MOP_xmovrr) { + return; + } + if (static_cast(insnMov1->GetOperand(kInsnSecondOpnd)).GetRegisterNumber() != + static_cast(insnMov2->GetOperand(kInsnSecondOpnd)).GetRegisterNumber()) { + return; + } + auto &mov2Dest = static_cast(insnMov2->GetOperand(kInsnFirstOpnd)); + auto &mov1Dest = static_cast(insnMov1->GetOperand(kInsnFirstOpnd)); + if (mov1Dest.IsVirtualRegister() || mov2Dest.IsVirtualRegister() || mov1Dest.GetRegisterNumber() != R0 || + mov2Dest.GetRegisterNumber() != R1) { + return; + } + bb.RemoveInsn(insn); + bb.RemoveInsn(*insnMov2); + bb.RemoveInsn(*insnMov1); + bb.SetKind(BB::kBBFallthru); +} + +bool LongIntCompareWithZAArch64::FindLondIntCmpWithZ(std::vector &optInsn, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + optInsn.clear(); + /* first */ + if (thisMop != MOP_xcmpri) { + return false; + } + optInsn.push_back(&insn); + + /* second */ + Insn *nextInsn1 = insn.GetNextMachineInsn(); + if (nextInsn1 == nullptr) { + return false; + } + MOperator nextMop1 = nextInsn1->GetMachineOpcode(); + if (nextMop1 != MOP_wcsinvrrrc) { + return false; + } + optInsn.push_back(nextInsn1); + + /* third */ + Insn *nextInsn2 = nextInsn1->GetNextMachineInsn(); + if (nextInsn2 == nullptr) { + return false; + } + MOperator nextMop2 = nextInsn2->GetMachineOpcode(); + if (nextMop2 != MOP_wcsincrrrc) { + return false; + } + optInsn.push_back(nextInsn2); + + /* forth */ + Insn *nextInsn3 = nextInsn2->GetNextMachineInsn(); + if (nextInsn3 == nullptr) { + return false; + } + MOperator nextMop3 = nextInsn3->GetMachineOpcode(); + if (nextMop3 != MOP_wcmpri) { + return false; + } + optInsn.push_back(nextInsn3); + return true; +} + +bool LongIntCompareWithZAArch64::IsPatternMatch(const std::vector &optInsn) { + constexpr int insnLen = 4; + if (optInsn.size() != insnLen) { + return false; + } + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + ASSERT(insnNum == 3, " this specific case has three insns"); + if (insn2->GetOperand(kInsnSecondOpnd).IsZeroRegister() && insn2->GetOperand(kInsnThirdOpnd).IsZeroRegister() && + insn3->GetOperand(kInsnThirdOpnd).IsZeroRegister() && + &(insn3->GetOperand(kInsnFirstOpnd)) == &(insn3->GetOperand(kInsnSecondOpnd)) && + static_cast(insn2->GetOperand(kInsnFourthOpnd)).GetCode() == CC_GE && + static_cast(insn3->GetOperand(kInsnFourthOpnd)).GetCode() == CC_LE && + static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetValue() == 0 && + static_cast(insn4->GetOperand(kInsnThirdOpnd)).GetValue() == 0) { + return true; + } + return false; +} + +void LongIntCompareWithZAArch64::Run(BB &bb, Insn &insn) { + std::vector optInsn; + /* found pattern */ + if (FindLondIntCmpWithZ(optInsn, insn) && IsPatternMatch(optInsn)) { + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(optInsn[0]->GetMachineOpcode(), + optInsn[0]->GetOperand(kInsnFirstOpnd), + optInsn[0]->GetOperand(kInsnSecondOpnd), + optInsn[0]->GetOperand(kInsnThirdOpnd)); + /* use newInsn to replace the third optInsn */ + bb.ReplaceInsn(*optInsn[3], newInsn); + optInsn.clear(); + } +} + +void ComplexMemOperandAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xadrpl12) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldp) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstp))) { + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + ASSERT(memOpnd != nullptr, "memOpnd is null in AArch64Peep::ComplexMemOperandAArch64"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + return; + } + + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + if (memOpnd->GetBaseRegister() != ®Opnd) { + return; + } + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + + auto &stImmOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + AArch64OfstOperand &offOpnd = aarch64CGFunc->GetOrCreateOfstOpnd( + stImmOpnd.GetOffset() + memOpnd->GetOffsetImmediate()->GetOffsetValue(), k32BitSize); + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeLo12Li, memOpnd->GetSize(), + &newBaseOpnd, nullptr, &offOpnd, stImmOpnd.GetSymbol()); + + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + bb.RemoveInsn(insn); + CHECK_FATAL(!CGOptions::IsLazyBinding() || cgFunc.GetCG()->IsLibcore(), + "this pattern can't be found in this phase"); + } +} + +void ComplexMemOperandPreAddAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrr && thisMop != MOP_waddrrr) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + if (!IsMemOperandOptPattern(insn, *nextInsn)) { + return; + } + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &newIndexOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + if (newBaseOpnd.GetSize() != k64BitSize) { + return; + } + if (newIndexOpnd.GetSize() <= k32BitSize) { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, 0, false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } else { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, nullptr, nullptr); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } + bb.RemoveInsn(insn); + } +} + +void ComplexMemOperandLSLAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrrs) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + ASSERT(memOpnd != nullptr, "null ptr check"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + + /* Only for immediate is 0. */ + if (memOpnd->GetOffsetImmediate()->GetOffsetValue() != 0) { + return; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + return; + } + + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + if (memOpnd->GetBaseRegister() != ®Opnd) { + return; + } + +#ifdef USE_32BIT_REF + if (nextInsn->IsAccessRefField() && nextInsn->GetOperand(kInsnFirstOpnd).GetSize() > k32BitSize) { + return; + } +#endif + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + auto &lsl = static_cast(insn.GetOperand(kInsnFourthOpnd)); + /* check if shift amount is valid */ + if ((memOpnd->GetSize() == k32BitSize && (lsl.GetShiftAmount() != 0 && lsl.GetShiftAmount() != 2)) || + (memOpnd->GetSize() == k64BitSize && (lsl.GetShiftAmount() != 0 && lsl.GetShiftAmount() != 3))) { + return; + } + + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &newIndexOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, lsl.GetShiftAmount(), false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + bb.RemoveInsn(insn); + } +} + + +void ComplexMemOperandLabelAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xldli) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop != MOP_xvmovdr) { + return; + } + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (regOpnd.GetRegisterNumber() != + static_cast(nextInsn->GetOperand(kInsnSecondOpnd)).GetRegisterNumber()) { + return; + } + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_dldli, nextInsn->GetOperand(kInsnFirstOpnd), + insn.GetOperand(kInsnSecondOpnd)); + bb.InsertInsnAfter(*nextInsn, newInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*nextInsn); +} + +/* + * mov R0, vreg1 / R0 -> objDesignateInsn + * add vreg2, vreg1, #imm -> fieldDesignateInsn + * mov R1, vreg2 -> fieldParamDefInsn + * mov R2, vreg3 -> fieldValueDefInsn + */ +bool WriteFieldCallAArch64::WriteFieldCallOptPatternMatch(const Insn &writeFieldCallInsn, WriteRefFieldParam ¶m, + std::vector ¶mDefInsns) { + Insn *fieldValueDefInsn = writeFieldCallInsn.GetPreviousMachineInsn(); + if (fieldValueDefInsn == nullptr || fieldValueDefInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &fieldValueDefInsnDestOpnd = fieldValueDefInsn->GetOperand(kInsnFirstOpnd); + auto &fieldValueDefInsnDestReg = static_cast(fieldValueDefInsnDestOpnd); + if (fieldValueDefInsnDestReg.GetRegisterNumber() != R2) { + return false; + } + paramDefInsns.push_back(fieldValueDefInsn); + param.fieldValue = &(fieldValueDefInsn->GetOperand(kInsnSecondOpnd)); + Insn *fieldParamDefInsn = fieldValueDefInsn->GetPreviousMachineInsn(); + if (fieldParamDefInsn == nullptr || fieldParamDefInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &fieldParamDestOpnd = fieldParamDefInsn->GetOperand(kInsnFirstOpnd); + auto &fieldParamDestReg = static_cast(fieldParamDestOpnd); + if (fieldParamDestReg.GetRegisterNumber() != R1) { + return false; + } + paramDefInsns.push_back(fieldParamDefInsn); + Insn *fieldDesignateInsn = fieldParamDefInsn->GetPreviousMachineInsn(); + if (fieldDesignateInsn == nullptr || fieldDesignateInsn->GetMachineOpcode() != MOP_xaddrri12) { + return false; + } + Operand &fieldParamDefSrcOpnd = fieldParamDefInsn->GetOperand(kInsnSecondOpnd); + Operand &fieldDesignateDestOpnd = fieldDesignateInsn->GetOperand(kInsnFirstOpnd); + if (!RegOperand::IsSameReg(fieldParamDefSrcOpnd, fieldDesignateDestOpnd)) { + return false; + } + Operand &fieldDesignateBaseOpnd = fieldDesignateInsn->GetOperand(kInsnSecondOpnd); + param.fieldBaseOpnd = &(static_cast(fieldDesignateBaseOpnd)); + auto &immOpnd = static_cast(fieldDesignateInsn->GetOperand(kInsnThirdOpnd)); + param.fieldOffset = immOpnd.GetValue(); + paramDefInsns.push_back(fieldDesignateInsn); + Insn *objDesignateInsn = fieldDesignateInsn->GetPreviousMachineInsn(); + if (objDesignateInsn == nullptr || objDesignateInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &objDesignateDestOpnd = objDesignateInsn->GetOperand(kInsnFirstOpnd); + auto &objDesignateDestReg = static_cast(objDesignateDestOpnd); + if (objDesignateDestReg.GetRegisterNumber() != R0) { + return false; + } + Operand &objDesignateSrcOpnd = objDesignateInsn->GetOperand(kInsnSecondOpnd); + if (RegOperand::IsSameReg(objDesignateDestOpnd, objDesignateSrcOpnd) || + !RegOperand::IsSameReg(objDesignateSrcOpnd, fieldDesignateBaseOpnd)) { + return false; + } + param.objOpnd = &(objDesignateInsn->GetOperand(kInsnSecondOpnd)); + paramDefInsns.push_back(objDesignateInsn); + return true; +} + +bool WriteFieldCallAArch64::IsWriteRefFieldCallInsn(const Insn &insn) { + if (!insn.IsCall() || insn.IsIndirectCall()) { + return false; + } + Operand *targetOpnd = insn.GetCallTargetOperand(); + ASSERT(targetOpnd != nullptr, "targetOpnd must not be nullptr"); + if (!targetOpnd->IsFuncNameOpnd()) { + return false; + } + FuncNameOperand *target = static_cast(targetOpnd); + const MIRSymbol *funcSt = target->GetFunctionSymbol(); + ASSERT(funcSt->GetSKind() == kStFunc, "the kind of funcSt is unreasonable"); + const std::string &funcName = funcSt->GetName(); + return funcName == "MCC_WriteRefField" || funcName == "MCC_WriteVolatileField"; +} + +static bool MayThrowBetweenInsn(const Insn &prevCallInsn, const Insn &currCallInsn) { + for (Insn *insn = prevCallInsn.GetNext(); insn != nullptr && insn != &currCallInsn; insn = insn->GetNext()) { + if (insn->MayThrow()) { + return true; + } + } + return false; +} + +void WriteFieldCallAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + std::vector paramDefInsns; + Insn *nextInsn = insn.GetNextMachineInsn(); + if (!IsWriteRefFieldCallInsn(insn)) { + return; + } + if (!hasWriteFieldCall) { + if (!WriteFieldCallOptPatternMatch(insn, firstCallParam, paramDefInsns)) { + return; + } + prevCallInsn = &insn; + hasWriteFieldCall = true; + return; + } + WriteRefFieldParam currentCallParam; + if (!WriteFieldCallOptPatternMatch(insn, currentCallParam, paramDefInsns)) { + return; + } + if (prevCallInsn == nullptr || MayThrowBetweenInsn(*prevCallInsn, insn)) { + return; + } + if (firstCallParam.objOpnd == nullptr || currentCallParam.objOpnd == nullptr || + currentCallParam.fieldBaseOpnd == nullptr) { + return; + } + if (!RegOperand::IsSameReg(*firstCallParam.objOpnd, *currentCallParam.objOpnd)) { + return; + } + MemOperand &addr = + aarch64CGFunc->CreateMemOpnd(*currentCallParam.fieldBaseOpnd, currentCallParam.fieldOffset, k64BitSize); + Insn &strInsn = cgFunc.GetCG()->BuildInstruction(MOP_xstr, *currentCallParam.fieldValue, addr); + strInsn.AppendComment("store reference field"); + strInsn.MarkAsAccessRefField(true); + bb.InsertInsnAfter(insn, strInsn); + for (Insn *paramDefInsn : paramDefInsns) { + bb.RemoveInsn(*paramDefInsn); + } + bb.RemoveInsn(insn); + prevCallInsn = &strInsn; + nextInsn = strInsn.GetNextMachineInsn(); +} + +void RemoveDecRefAArch64::Run(BB &bb, Insn &insn) { + if (insn.GetMachineOpcode() != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_DecRef_NaiveRCFast") { + return; + } + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if ((mopMov != MOP_xmovrr && mopMov != MOP_xmovri64) || + static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != R0) { + return; + } + Operand &srcOpndOfMov = insnMov->GetOperand(kInsnSecondOpnd); + if (!srcOpndOfMov.IsZeroRegister() && + !(srcOpndOfMov.IsImmediate() && static_cast(srcOpndOfMov).GetValue() == 0)) { + return; + } + bb.RemoveInsn(*insnMov); + bb.RemoveInsn(insn); + bb.SetKind(BB::kBBFallthru); +} + +/* + * Find 5 insn with certain OP code + * 1 : MOP_xaddrri12 + * 2 : MOP_waddrrr + * 3 : MOP_waddrri12 + * 4 : MOP_xsxtw64 + * 5 : MOP_xaddrrrs + */ +bool ComputationTreeAArch64::FindComputationTree(std::vector &optInsn, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + optInsn.clear(); + /* first */ + if (thisMop != MOP_xaddrri12) { + return false; + } + optInsn.push_back(&insn); + /* second */ + Insn *nextInsn1 = insn.GetNextMachineInsn(); + if (nextInsn1 == nullptr) { + return false; + } + MOperator nextMop1 = nextInsn1->GetMachineOpcode(); + if (nextMop1 != MOP_waddrrr) { + return false; + } + optInsn.push_back(nextInsn1); + /* third */ + Insn *nextInsn2 = nextInsn1->GetNextMachineInsn(); + if (nextInsn2 == nullptr) { + return false; + } + MOperator nextMop2 = nextInsn2->GetMachineOpcode(); + if (nextMop2 != MOP_waddrri12) { + return false; + } + optInsn.push_back(nextInsn2); + /* forth */ + Insn *nextInsn3 = nextInsn2->GetNextMachineInsn(); + if (nextInsn3 == nullptr) { + return false; + } + MOperator nextMop3 = nextInsn3->GetMachineOpcode(); + if (nextMop3 != MOP_xsxtw64) { + return false; + } + optInsn.push_back(nextInsn3); + /* fifth */ + Insn *nextInsn4 = nextInsn3->GetNextMachineInsn(); + if (nextInsn4 == nullptr) { + return false; + } + MOperator nextMop4 = nextInsn4->GetMachineOpcode(); + if (nextMop4 != MOP_xaddrrrs) { + return false; + } + optInsn.push_back(nextInsn4); + return true; +} + +/* + * Make sure the insn in opt_insn match the pattern as following: + * add x1, x1, #16 + * add w2, w10, w10 + * add w2, w2, #1 + * sxtw x2, w2 + * add x1, x1, x2, LSL #3 + * bl MCC_LoadRefField_NaiveRCFast + */ +bool ComputationTreeAArch64::IsPatternMatch(const std::vector &optInsn) const { + /* this speific pattern has exactly four insns */ + if (optInsn.size() <= 4) { + ERR(kLncErr, "access opt_insn failed"); + return false; + } + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + Insn *insn5 = optInsn[++insnNum]; + ASSERT(insnNum == 4, "match pattern failed in AArch64Peep::PatternIsMatch"); + Insn *insn6 = insn5->GetNext(); + if (insn6 != nullptr && insn6->GetMachineOpcode() != MOP_xbl && insn6->GetMachineOpcode() != MOP_tail_call_opt_xbl) { + return false; + } + CHECK_FATAL(insn6 != nullptr, "Insn null ptr check"); + auto &funcNameOpnd = static_cast(insn6->GetOperand(kInsnFirstOpnd)); + if (&(insn1->GetOperand(kInsnFirstOpnd)) == &(insn5->GetOperand(kInsnSecondOpnd)) && + &(insn2->GetOperand(kInsnSecondOpnd)) == &(insn2->GetOperand(kInsnThirdOpnd)) && + &(insn2->GetOperand(kInsnFirstOpnd)) == &(insn3->GetOperand(kInsnSecondOpnd)) && + &(insn3->GetOperand(kInsnFirstOpnd)) == &(insn4->GetOperand(kInsnSecondOpnd)) && + &(insn4->GetOperand(kInsnFirstOpnd)) == &(insn5->GetOperand(kInsnThirdOpnd)) && + funcNameOpnd.GetName() == "MCC_LoadRefField_NaiveRCFast" && + static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetValue() == k16BitSize && + static_cast(insn3->GetOperand(kInsnThirdOpnd)).GetValue() == 1) { + return true; + } + return false; +} + +void ComputationTreeAArch64::Run(BB &bb, Insn &insn) { + std::vector optInsn; + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (!insn.IsMachineInstruction()) { + return; + } + /* found pattern */ + if (FindComputationTree(optInsn, insn) && IsPatternMatch(optInsn)) { + Insn *sxtwInsn = optInsn[4]; // The pattern must has four insns. + CHECK_FATAL(sxtwInsn->GetOperand(kInsnFourthOpnd).GetKind() == Operand::kOpdShift, "should not happened"); + auto &lsl = static_cast(sxtwInsn->GetOperand(kInsnFourthOpnd)); + Operand *sxtw = nullptr; + Operand *imm = nullptr; + int32 lslBitLenth = 3; + uint32 lslShiftAmountCaseA = 3; + uint32 lslShiftAmountCaseB = 2; + int32 oriAddEnd = 16; + if (lsl.GetShiftAmount() == lslShiftAmountCaseA) { + sxtw = &aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, + lslShiftAmountCaseA + 1, lslBitLenth); + imm = &aarch64CGFunc->CreateImmOperand(oriAddEnd + (1ULL << lslShiftAmountCaseA), + kMaxAarch64ImmVal12Bits, true); + } else if (lsl.GetShiftAmount() == lslShiftAmountCaseB) { + sxtw = &aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, + lslShiftAmountCaseB + 1, lslBitLenth); + imm = &aarch64CGFunc->CreateImmOperand(oriAddEnd + (1ULL << lslShiftAmountCaseB), + kMaxAarch64ImmVal12Bits, true); + } + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_xxwaddrrre, + sxtwInsn->GetOperand(kInsnFirstOpnd), + optInsn[0]->GetOperand(kInsnSecondOpnd), + optInsn[1]->GetOperand(kInsnSecondOpnd), *sxtw); + bb.ReplaceInsn(*sxtwInsn, newInsn); + Insn &newAdd = + cgFunc.GetCG()->BuildInstruction(MOP_xaddrri12, sxtwInsn->GetOperand(kInsnFirstOpnd), + sxtwInsn->GetOperand(kInsnFirstOpnd), *imm); + (void)bb.InsertInsnAfter(newInsn, newAdd); + optInsn.clear(); + } +} + +/* + * We optimize the following pattern in this function: + * and x1, x1, #imm (is n power of 2) + * cbz/cbnz x1, .label + * => + * and x1, x1, #imm (is n power of 2) + * tbnz/tbz x1, #n, .label + */ +void OneHoleBranchesAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (&insn != bb.GetLastInsn()) { + return; + } + /* check cbz/cbnz insn */ + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_wcbz && thisMop != MOP_wcbnz && thisMop != MOP_xcbz && thisMop != MOP_xcbnz) { + return; + } + /* check and insn */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_wandrri12 && prevMop != MOP_xandrri13) { + return; + } + /* check opearnd of two insns */ + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + auto &imm = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + int n = logValueAtBase2(imm.GetValue()); + if (n < 0) { + return; + } + + /* replace insn */ + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + MOperator newOp = MOP_undef; + switch (thisMop) { + case MOP_wcbz: + newOp = MOP_wtbz; + break; + case MOP_wcbnz: + newOp = MOP_wtbnz; + break; + case MOP_xcbz: + newOp = MOP_xtbz; + break; + case MOP_xcbnz: + newOp = MOP_xtbnz; + break; + default: + CHECK_FATAL(false, "can not touch here"); + break; + } + ImmOperand &oneHoleOpnd = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction( + newOp, prevInsn->GetOperand(kInsnSecondOpnd), oneHoleOpnd, label)); + bb.RemoveInsn(insn); +} + +void ReplaceIncDecWithIncAArch64::Run(BB &bb, Insn &insn) { + if (insn.GetMachineOpcode() != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_IncDecRef_NaiveRCFast") { + return; + } + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if (mopMov != MOP_xmovrr) { + return; + } + if (static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != R1 || + !insnMov->GetOperand(kInsnSecondOpnd).IsZeroRegister()) { + return; + } + std::string funcName = "MCC_IncRef_NaiveRCFast"; + GStrIdx strIdx = GlobalTables::GetStrTable().GetStrIdxFromName(funcName); + MIRSymbol *st = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(strIdx, true); + if (st == nullptr) { + LogInfo::MapleLogger() << "WARNING: Replace IncDec With Inc fail due to no MCC_IncRef_NaiveRCFast func\n"; + return; + } + bb.RemoveInsn(*insnMov); + target.SetFunctionSymbol(*st); +} + + +void AndCmpBranchesToTbzAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (&insn != bb.GetLastInsn()) { + return; + } + MOperator mopB = insn.GetMachineOpcode(); + if (mopB != MOP_beq && mopB != MOP_bne) { + return; + } + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + /* get the instruction before bne/beq, expects its type is cmp. */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_wcmpri && prevMop != MOP_xcmpri) { + return; + } + + /* get the instruction before "cmp", expect its type is "and". */ + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr) { + return; + } + MOperator mopAnd = prevPrevInsn->GetMachineOpcode(); + if (mopAnd != MOP_wandrri12 && mopAnd != MOP_xandrri13) { + return; + } + + /* + * check operand + * + * the real register of "cmp" and "and" must be the same. + */ + if (&(prevInsn->GetOperand(kInsnSecondOpnd)) != &(prevPrevInsn->GetOperand(kInsnFirstOpnd))) { + return; + } + + int opndIdx = 2; + if (!prevPrevInsn->GetOperand(opndIdx).IsIntImmediate() || !prevInsn->GetOperand(opndIdx).IsIntImmediate()) { + return; + } + auto &immAnd = static_cast(prevPrevInsn->GetOperand(opndIdx)); + auto &immCmp = static_cast(prevInsn->GetOperand(opndIdx)); + if (immCmp.GetValue() == 0) { + int n = logValueAtBase2(immAnd.GetValue()); + if (n < 0) { + return; + } + /* judge whether the flag_reg and "w0" is live later. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + auto &cmpReg = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB()) || FindRegLiveOut(cmpReg, *prevInsn->GetBB())) { + return; + } + MOperator mopNew = MOP_undef; + switch (mopB) { + case MOP_beq: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbz; + } + break; + case MOP_bne: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbnz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbnz; + } + break; + default: + CHECK_FATAL(false, "expects beq or bne insn"); + break; + } + ImmOperand &newImm = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(mopNew, + prevPrevInsn->GetOperand(kInsnSecondOpnd), newImm, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } else { + int n = logValueAtBase2(immAnd.GetValue()); + int m = logValueAtBase2(immCmp.GetValue()); + if (n < 0 || m < 0 || n != m) { + return; + } + /* judge whether the flag_reg and "w0" is live later. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + auto &cmpReg = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB()) || FindRegLiveOut(cmpReg, *prevInsn->GetBB())) { + return; + } + MOperator mopNew = MOP_undef; + switch (mopB) { + case MOP_beq: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbnz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbnz; + } + break; + case MOP_bne: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbz; + } + break; + default: + CHECK_FATAL(false, "expects beq or bne insn"); + break; + } + ImmOperand &newImm = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(mopNew, + prevPrevInsn->GetOperand(kInsnSecondOpnd), newImm, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_reaching.cpp b/src/maple_be/src/cg/aarch64/aarch64_reaching.cpp new file mode 100644 index 0000000000000000000000000000000000000000..188254489c64cf7190678760c60762d98885d127 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_reaching.cpp @@ -0,0 +1,942 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_reaching.h" +#include "aarch64_cg.h" +#include "aarch64_operand.h" +namespace maplebe { +/* MCC_ClearLocalStackRef clear 1 stack slot, and MCC_DecRefResetPair clear 2 stack slot, + * the stack positins cleared are recorded in callInsn->clearStackOffset + */ +constexpr short kFirstClearMemIndex = 0; +constexpr short kSecondClearMemIndex = 1; + +/* insert pseudo insn for parameters definition */ +void AArch64ReachingDefinition::InitStartGen() { + BB *bb = cgFunc->GetFirstBB(); + + /* Parameters should be define first. */ + ParmLocator parmLocator(cgFunc->GetBecommon()); + PLocInfo pLoc; + for (uint32 i = 0; i < cgFunc->GetFunction().GetFormalCount(); ++i) { + MIRType *type = cgFunc->GetFunction().GetNthParamType(i); + parmLocator.LocateNextParm(*type, pLoc); + if (pLoc.reg0 == 0) { + /* If is a large frame, parameter addressing mode is based vreg:Vra. */ + continue; + } + + uint64 symSize = cgFunc->GetBecommon().GetTypeSize(type->GetTypeIndex()); + RegType regType = (pLoc.reg0 < V0) ? kRegTyInt : kRegTyFloat; + uint32 srcBitSize = ((symSize < k4ByteSize) ? k4ByteSize : symSize) * kBitsPerByte; + + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + RegOperand ®Opnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg0, srcBitSize, regType); + + MOperator mOp; + if (regType == kRegTyInt) { + if (srcBitSize <= k32BitSize) { + mOp = MOP_pseudo_param_def_w; + } else { + mOp = MOP_pseudo_param_def_x; + } + } else { + if (srcBitSize <= k32BitSize) { + mOp = MOP_pseudo_param_def_s; + } else { + mOp = MOP_pseudo_param_def_d; + } + } + + Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd); + bb->InsertInsnBegin(pseudoInsn); + pseudoInsns.push_back(&pseudoInsn); + + { + /* + * define memory address since store param may be transfered to stp and which with the short offset range. + * we can not get the correct definition before RA. + * example: + * add x8, sp, #712 + * stp x0, x1, [x8] // store param: _this Reg40_R313644 + * stp x2, x3, [x8,#16] // store param: Reg41_R333743 Reg42_R333622 + * stp x4, x5, [x8,#32] // store param: Reg43_R401297 Reg44_R313834 + * str x7, [x8,#48] // store param: Reg46_R401297 + */ + MIRSymbol *sym = cgFunc->GetFunction().GetFormal(i); + if (!sym->IsPreg()) { + MIRSymbol *firstSym = cgFunc->GetFunction().GetFormal(i); + const AArch64SymbolAlloc *firstSymLoc = + static_cast(cgFunc->GetMemlayout()->GetSymAllocInfo(firstSym->GetStIndex())); + int32 stOffset = cgFunc->GetBaseOffset(*firstSymLoc); + MIRType *firstType = cgFunc->GetFunction().GetNthParamType(i); + uint32 firstSymSize = cgFunc->GetBecommon().GetTypeSize(firstType->GetTypeIndex()); + uint32 firstStackSize = firstSymSize < k4ByteSize ? k4ByteSize : firstSymSize; + + AArch64MemOperand *memOpnd = cgFunc->GetMemoryPool()->New(RFP, stOffset, + firstStackSize * kBitsPerByte); + MOperator mopTemp = firstStackSize <= k4ByteSize ? MOP_pseudo_param_store_w : MOP_pseudo_param_store_x; + Insn &pseudoInsnTemp = cgFunc->GetCG()->BuildInstruction(mopTemp, *memOpnd); + bb->InsertInsnBegin(pseudoInsnTemp); + pseudoInsns.push_back(&pseudoInsnTemp); + } + } + } + + /* if function has "bl MCC_InitializeLocalStackRef", should define corresponding memory. */ + AArch64CGFunc *a64CGFunc = static_cast(cgFunc); + + for (uint32 i = 0; i < a64CGFunc->GetRefCount(); ++i) { + AArch64MemOperand *memOpnd = cgFunc->GetMemoryPool()->New( + RFP, a64CGFunc->GetBeginOffset() + i * k8BitSize, k64BitSize); + Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_ref_init_x, *memOpnd); + + bb->InsertInsnBegin(pseudoInsn); + pseudoInsns.push_back(&pseudoInsn); + } +} + +/* insert pseudoInsns for ehBB, R0 and R1 are defined in pseudoInsns */ +void AArch64ReachingDefinition::InitEhDefine(BB &bb) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + /* Insert MOP_pseudo_eh_def_x R1. */ + RegOperand ®Opnd1 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, kRegTyInt); + Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_eh_def_x, regOpnd1); + bb.InsertInsnBegin(pseudoInsn); + pseudoInsns.push_back(&pseudoInsn); + + /* insert MOP_pseudo_eh_def_x R0. */ + RegOperand ®Opnd2 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt); + Insn &newPseudoInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_eh_def_x, regOpnd2); + bb.InsertInsnBegin(newPseudoInsn); + pseudoInsns.push_back(&newPseudoInsn); +} + +/* insert pseudoInsns for return value R0/V0 */ +void AArch64ReachingDefinition::AddRetPseudoInsn(BB &bb) { + AArch64reg regNO = static_cast(cgFunc)->GetReturnRegisterNumber(); + if (regNO == kInvalidRegNO) { + return; + } + + if (regNO == R0) { + RegOperand ®Opnd = + static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand(regNO, k64BitSize, kRegTyInt); + Insn &retInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_ret_int, regOpnd); + bb.AppendInsn(retInsn); + pseudoInsns.push_back(&retInsn); + } else { + ASSERT(regNO == V0, "CG internal error. Return value should be R0 or V0."); + RegOperand ®Opnd = + static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand(regNO, k64BitSize, kRegTyFloat); + Insn &retInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_ret_float, regOpnd); + bb.AppendInsn(retInsn); + pseudoInsns.push_back(&retInsn); + } +} + +void AArch64ReachingDefinition::AddRetPseudoInsns() { + uint32 exitBBSize = cgFunc->GetExitBBsVec().size(); + if (exitBBSize == 0) { + if (cgFunc->GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc->GetCleanupLabel() && + cgFunc->GetLastBB()->GetPrev()->GetPrev()) { + AddRetPseudoInsn(*cgFunc->GetLastBB()->GetPrev()->GetPrev()); + } else { + AddRetPseudoInsn(*cgFunc->GetLastBB()->GetPrev()); + } + } else { + for (uint32 i = 0; i < exitBBSize; ++i) { + AddRetPseudoInsn(*cgFunc->GetExitBB(i)); + } + } +} + +/* all caller saved register are modified by call insn */ +void AArch64ReachingDefinition::GenAllCallerSavedRegs(BB &bb) { + for (uint32 i = R0; i <= V31; ++i) { + if (IsCallerSavedReg(i)) { + regGen[bb.GetId()]->SetBit(i); + } + } +} + +/* + * find definition for register between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + */ +std::vector AArch64ReachingDefinition::FindRegDefBetweenInsn(uint32 regNO, Insn *startInsn, + Insn *endInsn) const { + std::vector defInsnVec; + if (startInsn == nullptr || endInsn == nullptr) { + return defInsnVec; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "two insns must be in a same BB"); + if (!regGen[startInsn->GetBB()->GetId()]->TestBit(regNO)) { + return defInsnVec; + } + + for (Insn *insn = endInsn; insn != nullptr && insn != startInsn->GetPrev(); insn = insn->GetPrev()) { + if (!insn->IsMachineInstruction()) { + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + if (insn->IsCall() && IsCallerSavedReg(regNO)) { + defInsnVec.push_back(insn); + return defInsnVec; + } + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isDef = regProp->IsDef(); + if (!isDef && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsList()) { + CHECK_FATAL(false, "Internal error, list operand should not be defined."); + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + + if (base != nullptr) { + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && + base->GetRegisterNumber() == regNO) { + defInsnVec.push_back(insn); + return defInsnVec; + } + } + } else if ((opnd.IsConditionCode() || opnd.IsRegister()) && + (static_cast(opnd).GetRegisterNumber() == regNO)) { + defInsnVec.push_back(insn); + return defInsnVec; + } + } + } + return defInsnVec; +} + +void AArch64ReachingDefinition::FindRegDefInBB(uint32 regNO, BB &bb, InsnSet &defInsnSet) const { + if (!regGen[bb.GetId()]->TestBit(regNO)) { + return; + } + + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + if (insn->IsCall() && IsCallerSavedReg(regNO)) { + defInsnSet.insert(insn); + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->GetOperand(i)); + bool isDef = regProp->IsDef(); + if (!isDef && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsList()) { + ASSERT(false, "Internal error, list operand should not be defined."); + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + + if (base != nullptr) { + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && + base->GetRegisterNumber() == regNO) { + defInsnSet.insert(insn); + } + } + } else if ((opnd.IsConditionCode() || opnd.IsRegister()) && + (static_cast(opnd).GetRegisterNumber() == regNO)) { + defInsnSet.insert(insn); + } + } + } +} + +/* check whether call insn changed the stack status or not. */ +bool AArch64ReachingDefinition::CallInsnClearDesignateStackRef(const Insn &callInsn, int64 offset) const { + return offset == callInsn.GetClearStackOffset(kFirstClearMemIndex) || + offset == callInsn.GetClearStackOffset(kSecondClearMemIndex); +} + +/* + * find definition for stack memory operand between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + * special case: + * MCC_ClearLocalStackRef clear designate stack position, the designate stack position is thought defined + * for example: + * add x0, x29, #24 + * bl MCC_ClearLocalStackRef + */ +std::vector AArch64ReachingDefinition::FindMemDefBetweenInsn(uint32 offset, const Insn *startInsn, + Insn *endInsn) const { + std::vector defInsnVec; + if (startInsn == nullptr || endInsn == nullptr) { + return defInsnVec; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "two insns must be in a same BB"); + if (!memGen[startInsn->GetBB()->GetId()]->TestBit(offset / kMemZoomSize)) { + return defInsnVec; + } + + for (Insn *insn = endInsn; insn != nullptr && insn != startInsn->GetPrev(); insn = insn->GetPrev()) { + if (!insn->IsMachineInstruction()) { + continue; + } + + if (insn->IsCall()) { + if (CallInsnClearDesignateStackRef(*insn, offset)) { + defInsnVec.push_back(insn); + return defInsnVec; + } + continue; + } + + if (!(insn->IsStore() || insn->IsPseudoInstruction())) { + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + + if (base == nullptr || !IsFrameReg(*base) || index != nullptr) { + break; + } + + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffset = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if ((offset == memOffset) || + (insn->IsStorePair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode()))) { + defInsnVec.push_back(insn); + return defInsnVec; + } + } + } + } + return defInsnVec; +} + +void AArch64ReachingDefinition::FindMemDefInBB(uint32 offset, BB &bb, InsnSet &defInsnSet) const { + if (!memGen[bb.GetId()]->TestBit(offset / kMemZoomSize)) { + return; + } + + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + continue; + } + + if (insn->IsCall()) { + if (CallInsnClearDesignateStackRef(*insn, offset)) { + defInsnSet.insert(insn); + } + continue; + } + + if (!(insn->IsStore() || insn->IsPseudoInstruction())) { + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + + if (base == nullptr || !IsFrameReg(*base) || index != nullptr) { + break; + } + + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffset = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if (offset == memOffset) { + defInsnSet.insert(insn); + break; + } + if (insn->IsStorePair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode())) { + defInsnSet.insert(insn); + break; + } + } + } + } +} + +/* + * find defininition for register Iteratively. + * input: + * startBB: find definnition starting from startBB + * regNO: the No of register to be find + * visitedBB: record these visited BB + * defInsnSet: insn defining register is saved in this set + */ +void AArch64ReachingDefinition::DFSFindDefForRegOpnd(const BB &startBB, uint32 regNO, + std::vector &visitedBB, InsnSet &defInsnSet) const { + std::vector defInsnVec; + for (auto predBB : startBB.GetPreds()) { + if (visitedBB[predBB->GetId()] != kNotVisited) { + continue; + } + visitedBB[predBB->GetId()] = kNormalVisited; + if (regGen[predBB->GetId()]->TestBit(regNO)) { + defInsnVec.clear(); + defInsnVec = FindRegDefBetweenInsn(regNO, predBB->GetFirstInsn(), predBB->GetLastInsn()); + ASSERT(!defInsnVec.empty(), "opnd must be defined in this bb"); + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + } else if (regIn[predBB->GetId()]->TestBit(regNO)) { + DFSFindDefForRegOpnd(*predBB, regNO, visitedBB, defInsnSet); + } + } + + for (auto predEhBB : startBB.GetEhPreds()) { + if (visitedBB[predEhBB->GetId()] == kEHVisited) { + continue; + } + visitedBB[predEhBB->GetId()] = kEHVisited; + if (regGen[predEhBB->GetId()]->TestBit(regNO)) { + FindRegDefInBB(regNO, *predEhBB, defInsnSet); + } + + if (regIn[predEhBB->GetId()]->TestBit(regNO)) { + DFSFindDefForRegOpnd(*predEhBB, regNO, visitedBB, defInsnSet); + } + } +} + +/* + * find defininition for stack memory iteratively. + * input: + * startBB: find definnition starting from startBB + * offset: the offset of memory to be find + * visitedBB: record these visited BB + * defInsnSet: insn defining register is saved in this set + */ +void AArch64ReachingDefinition::DFSFindDefForMemOpnd(const BB &startBB, uint32 offset, + std::vector &visitedBB, InsnSet &defInsnSet) const { + std::vector defInsnVec; + for (auto predBB : startBB.GetPreds()) { + if (visitedBB[predBB->GetId()] != kNotVisited) { + continue; + } + visitedBB[predBB->GetId()] = kNormalVisited; + if (memGen[predBB->GetId()]->TestBit(offset / kMemZoomSize)) { + defInsnVec.clear(); + defInsnVec = FindMemDefBetweenInsn(offset, predBB->GetFirstInsn(), predBB->GetLastInsn()); + ASSERT(!defInsnVec.empty(), "opnd must be defined in this bb"); + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + } else if (memIn[predBB->GetId()]->TestBit(offset / kMemZoomSize)) { + DFSFindDefForMemOpnd(*predBB, offset, visitedBB, defInsnSet); + } + } + + for (auto predEhBB : startBB.GetEhPreds()) { + if (visitedBB[predEhBB->GetId()] == kEHVisited) { + continue; + } + visitedBB[predEhBB->GetId()] = kEHVisited; + if (memGen[predEhBB->GetId()]->TestBit(offset / kMemZoomSize)) { + FindMemDefInBB(offset, *predEhBB, defInsnSet); + } + + if (memIn[predEhBB->GetId()]->TestBit(offset / kMemZoomSize)) { + DFSFindDefForMemOpnd(*predEhBB, offset, visitedBB, defInsnSet); + } + } +} + +/* + * find defininition for register. + * input: + * insn: the insn in which register is used + * indexOrRegNO: the index of register in insn or the No of register to be find + * isRegNO: if indexOrRegNO is index, this argument is false, else is true + * return: + * the set of definition insns for register + */ +InsnSet AArch64ReachingDefinition::FindDefForRegOpnd(Insn &insn, uint32 indexOrRegNO, bool isRegNO) const { + uint32 regNO = indexOrRegNO; + if (!isRegNO) { + Operand &opnd = insn.GetOperand(indexOrRegNO); + auto ®Opnd = static_cast(opnd); + regNO = regOpnd.GetRegisterNumber(); + } + + std::vector defInsnVec; + if (regGen[insn.GetBB()->GetId()]->TestBit(regNO)) { + defInsnVec = FindRegDefBetweenInsn(regNO, insn.GetBB()->GetFirstInsn(), insn.GetPrev()); + } + InsnSet defInsnSet; + if (!defInsnVec.empty()) { + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + return defInsnSet; + } + std::vector visitedBB(kMaxBBNum, kNotVisited); + if (insn.GetBB()->IsCleanup()) { + DFSFindDefForRegOpnd(*insn.GetBB(), regNO, visitedBB, defInsnSet); + if (defInsnSet.empty()) { + FOR_ALL_BB(bb, cgFunc) { + if (bb->IsCleanup()) { + continue; + } + if (regGen[bb->GetId()]->TestBit(regNO)) { + FindRegDefInBB(regNO, *bb, defInsnSet); + } + } + } + } else { + DFSFindDefForRegOpnd(*insn.GetBB(), regNO, visitedBB, defInsnSet); + } + return defInsnSet; +} + +/* + * find insn using register between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + */ +bool AArch64ReachingDefinition::FindRegUseBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn, + InsnSet ®UseInsnSet) const { + bool findFinish = false; + if (startInsn == nullptr || endInsn == nullptr) { + return findFinish; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "two insns must be in a same BB"); + + for (Insn *insn = startInsn; insn != nullptr && insn != endInsn->GetNext(); insn = insn->GetNext()) { + if (!insn->IsMachineInstruction()) { + continue; + } + /* if insn is call and regNO is caller-saved register, then regNO will not be used later */ + if (insn->IsCall() && IsCallerSavedReg(regNO)) { + findFinish = true; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto listElem : listOpnd.GetOperands()) { + RegOperand *regOpnd = static_cast(listElem); + ASSERT(regOpnd != nullptr, "parameter operand must be RegOperand"); + if (regNO == regOpnd->GetRegisterNumber()) { + regUseInsnSet.insert(insn); + } + } + continue; + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *baseOpnd = memOpnd.GetBaseRegister(); + if (baseOpnd != nullptr && + (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi) && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && + baseOpnd->GetRegisterNumber() == regNO) { + findFinish = true; + } + } + + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (regProp->IsDef() && + (opnd.IsConditionCode() || opnd.IsRegister()) && + (static_cast(opnd).GetRegisterNumber() == regNO)) { + findFinish = true; + } + + if (!regProp->IsUse() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + if ((base != nullptr && base->GetRegisterNumber() == regNO) || + (index != nullptr && index->GetRegisterNumber() == regNO)) { + regUseInsnSet.insert(insn); + } + } else if (opnd.IsConditionCode()) { + Operand &rflagOpnd = cgFunc->GetOrCreateRflag(); + RegOperand &rflagReg = static_cast(rflagOpnd); + if (rflagReg.GetRegisterNumber() == regNO) { + regUseInsnSet.insert(insn); + } + } else if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == regNO)) { + regUseInsnSet.insert(insn); + } + } + + if (findFinish) { + break; + } + } + return findFinish; +} + +/* + * find insn using stack memory operand between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + */ +bool AArch64ReachingDefinition::FindMemUseBetweenInsn(uint32 offset, Insn *startInsn, const Insn *endInsn, + InsnSet &memUseInsnSet) const { + bool findFinish = false; + if (startInsn == nullptr || endInsn == nullptr) { + return findFinish; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "end ID must be greater than or equal to start ID"); + + for (Insn *insn = startInsn; insn != nullptr && insn != endInsn->GetNext(); insn = insn->GetNext()) { + if (!insn->IsMachineInstruction()) { + continue; + } + + if (insn->IsCall()) { + if (CallInsnClearDesignateStackRef(*insn, offset)) { + return true; + } + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (!opnd.IsMemoryAccessOperand()) { + continue; + } + + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if (base == nullptr || !IsFrameReg(*base)) { + continue; + } + + ASSERT(memOpnd.GetIndexRegister() == nullptr, "offset must not be Register for frame MemOperand"); + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffset = memOpnd.GetOffsetImmediate()->GetValue(); + + if (insn->IsStore() || insn->IsPseudoInstruction()) { + if (memOffset == offset) { + findFinish = true; + continue; + } + if (insn->IsStorePair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode())) { + findFinish = true; + continue; + } + } + + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isUse = regProp->IsUse(); + if (!isUse) { + continue; + } + + if (offset == memOffset) { + memUseInsnSet.insert(insn); + } else if (insn->IsLoadPair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode())) { + memUseInsnSet.insert(insn); + } + } + + if (findFinish) { + break; + } + } + return findFinish; +} + +/* find all definition for stack memory operand insn.opnd[index] */ +InsnSet AArch64ReachingDefinition::FindDefForMemOpnd(Insn &insn, uint32 indexOrOffset, bool isOffset) const { + InsnSet defInsnSet; + int64 memOffSet = 0; + if (!isOffset) { + Operand &opnd = insn.GetOperand(indexOrOffset); + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be MemOperand"); + + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *indexReg = memOpnd.GetIndexRegister(); + + if (base == nullptr || !IsFrameReg(*base) || indexReg) { + return defInsnSet; + } + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + memOffSet = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + } else { + memOffSet = indexOrOffset; + } + std::vector defInsnVec; + if (memGen[insn.GetBB()->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + defInsnVec = FindMemDefBetweenInsn(memOffSet, insn.GetBB()->GetFirstInsn(), insn.GetPrev()); + } + + if (!defInsnVec.empty()) { + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + return defInsnSet; + } + std::vector visitedBB(kMaxBBNum, kNotVisited); + if (insn.GetBB()->IsCleanup()) { + DFSFindDefForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, defInsnSet); + if (defInsnSet.empty()) { + FOR_ALL_BB(bb, cgFunc) { + if (bb->IsCleanup()) { + continue; + } + + if (memGen[bb->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + FindMemDefInBB(memOffSet, *bb, defInsnSet); + } + } + } + } else { + DFSFindDefForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, defInsnSet); + } + + return defInsnSet; +} + +/* + * find all insn using stack memory operand insn.opnd[index] + * secondMem is used to represent the second stack memory opernad in store pair insn + */ +InsnSet AArch64ReachingDefinition::FindUseForMemOpnd(Insn &insn, uint8 index, bool secondMem) const { + Operand &opnd = insn.GetOperand(index); + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be MemOperand"); + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + + InsnSet useInsnSet; + if (base == nullptr || !IsFrameReg(*base)) { + return useInsnSet; + } + + ASSERT(memOpnd.GetIndexRegister() == nullptr, "IndexRegister no nullptr"); + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffSet = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if (secondMem) { + ASSERT(insn.IsStorePair(), "second MemOperand can only be defined in stp insn"); + memOffSet += GetEachMemSizeOfPair(insn.GetMachineOpcode()); + } + /* memOperand may be redefined in current BB */ + bool findFinish = FindMemUseBetweenInsn(memOffSet, insn.GetNext(), insn.GetBB()->GetLastInsn(), useInsnSet); + std::vector visitedBB(kMaxBBNum, false); + if (findFinish || !memOut[insn.GetBB()->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + if (insn.GetBB()->GetEhSuccs().size() != 0) { + DFSFindUseForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, useInsnSet, true); + } + } else { + DFSFindUseForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, useInsnSet, false); + } + if (!insn.GetBB()->IsCleanup() && firstCleanUpBB) { + if (memUse[firstCleanUpBB->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + findFinish = FindMemUseBetweenInsn(memOffSet, firstCleanUpBB->GetFirstInsn(), + firstCleanUpBB->GetLastInsn(), useInsnSet); + if (findFinish || !memOut[firstCleanUpBB->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + return useInsnSet; + } + } + DFSFindUseForMemOpnd(*firstCleanUpBB, memOffSet, visitedBB, useInsnSet, false); + } + return useInsnSet; +} + +/* + * initialize bb.gen and bb.use + * if it is not computed in first time, bb.gen and bb.use must be cleared firstly + */ +void AArch64ReachingDefinition::InitGenUse(BB &bb, bool firstTime) { + if (!firstTime && (mode & kRDRegAnalysis)) { + regGen[bb.GetId()]->ResetAllBit(); + regUse[bb.GetId()]->ResetAllBit(); + } + if (!firstTime && (mode & kRDMemAnalysis)) { + memGen[bb.GetId()]->ResetAllBit(); + memUse[bb.GetId()]->ResetAllBit(); + } + + if (bb.IsEmpty()) { + return; + } + + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + continue; + } + if (insn->IsCall()) { + GenAllCallerSavedRegs(bb); + InitMemInfoForClearStackCall(*insn); + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (opnd.IsList() && (mode & kRDRegAnalysis)) { + ASSERT(regProp->IsUse(), "ListOperand is used in insn"); + InitInfoForListOpnd(bb, opnd); + } else if (opnd.IsMemoryAccessOperand()) { + InitInfoForMemOperand(*insn, opnd, regProp->IsDef()); + } else if (opnd.IsConditionCode() && (mode & kRDRegAnalysis)) { + ASSERT(regProp->IsUse(), "condition code is used in insn"); + InitInfoForConditionCode(bb); + } else if (opnd.IsRegister() && (mode & kRDRegAnalysis)) { + InitInfoForRegOpnd(bb, opnd, regProp->IsDef()); + } + } + } +} + +void AArch64ReachingDefinition::InitMemInfoForClearStackCall(Insn &callInsn) { + if (!(mode & kRDMemAnalysis) || !callInsn.IsClearDesignateStackCall()) { + return; + } + int64 firstOffset = callInsn.GetClearStackOffset(kFirstClearMemIndex); + constexpr int64 defaultValOfClearMemOffset = -1; + if (firstOffset != defaultValOfClearMemOffset) { + memGen[callInsn.GetBB()->GetId()]->SetBit(firstOffset / kMemZoomSize); + } + int64 secondOffset = callInsn.GetClearStackOffset(kSecondClearMemIndex); + if (secondOffset != defaultValOfClearMemOffset) { + memGen[callInsn.GetBB()->GetId()]->SetBit(secondOffset / kMemZoomSize); + } +} + +void AArch64ReachingDefinition::InitInfoForMemOperand(Insn &insn, Operand &opnd, bool isDef) { + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be MemOperand"); + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + + if (base == nullptr) { + return; + } + if ((mode & kRDMemAnalysis) && IsFrameReg(*base)) { + CHECK_FATAL(index == nullptr, "Existing [x29 + index] Memory Address"); + ASSERT(memOpnd.GetOffsetImmediate(), "offset must be a immediate value"); + int32 offsetVal = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if ((offsetVal % kMemZoomSize) != 0) { + SetAnalysisMode(kRDRegAnalysis); + } + + if (!isDef) { + memUse[insn.GetBB()->GetId()]->SetBit(offsetVal / kMemZoomSize); + if (insn.IsLoadPair()) { + int64 nextMemOffset = offsetVal + GetEachMemSizeOfPair(insn.GetMachineOpcode()); + memUse[insn.GetBB()->GetId()]->SetBit(nextMemOffset / kMemZoomSize); + } + } else if (isDef) { + memGen[insn.GetBB()->GetId()]->SetBit(offsetVal / kMemZoomSize); + if (insn.IsStorePair()) { + int64 nextMemOffset = offsetVal + GetEachMemSizeOfPair(insn.GetMachineOpcode()); + memGen[insn.GetBB()->GetId()]->SetBit(nextMemOffset / kMemZoomSize); + } + } + } + + if (mode & kRDRegAnalysis) { + regUse[insn.GetBB()->GetId()]->SetBit(base->GetRegisterNumber()); + if (index != nullptr) { + regUse[insn.GetBB()->GetId()]->SetBit(index->GetRegisterNumber()); + } + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + /* Base operand has changed. */ + regGen[insn.GetBB()->GetId()]->SetBit(base->GetRegisterNumber()); + } + } +} + +void AArch64ReachingDefinition::InitInfoForListOpnd(const BB &bb, Operand &opnd) { + ListOperand *listOpnd = static_cast(&opnd); + for (auto listElem : listOpnd->GetOperands()) { + RegOperand *regOpnd = static_cast(listElem); + ASSERT(regOpnd != nullptr, "used Operand in call insn must be Register"); + regUse[bb.GetId()]->SetBit(regOpnd->GetRegisterNumber()); + } +} + +void AArch64ReachingDefinition::InitInfoForConditionCode(const BB &bb) { + Operand &rflagOpnd = cgFunc->GetOrCreateRflag(); + RegOperand &rflagReg = static_cast(rflagOpnd); + regUse[bb.GetId()]->SetBit(rflagReg.GetRegisterNumber()); +} + +void AArch64ReachingDefinition::InitInfoForRegOpnd(const BB &bb, Operand &opnd, bool isDef) { + RegOperand *regOpnd = static_cast(&opnd); + if (!isDef) { + regUse[bb.GetId()]->SetBit(regOpnd->GetRegisterNumber()); + } else { + regGen[bb.GetId()]->SetBit(regOpnd->GetRegisterNumber()); + } +} + +int32 AArch64ReachingDefinition::GetStackSize() const { + const int sizeofFplr = kDivide2 * kIntregBytelen; + return static_cast(cgFunc->GetMemlayout())->RealStackFrameSize() + sizeofFplr; +} + +bool AArch64ReachingDefinition::IsCallerSavedReg(uint32 regNO) const { + return (R0 <= regNO && regNO <= R18) || (V0 <= regNO && regNO <= V7) || (V16 <= regNO && regNO <= V31); +} + +int64 AArch64ReachingDefinition::GetEachMemSizeOfPair(MOperator opCode) const { + switch (opCode) { + case MOP_wstp: + case MOP_sstp: + case MOP_wstlxp: + case MOP_wldp: + case MOP_xldpsw: + case MOP_sldp: + case MOP_wldaxp: + return kWordByteNum; + case MOP_xstp: + case MOP_dstp: + case MOP_xstlxp: + case MOP_xldp: + case MOP_dldp: + case MOP_xldaxp: + return kDoubleWordByteNum; + default: + return 0; + } +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_schedule.cpp b/src/maple_be/src/cg/aarch64/aarch64_schedule.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4050bca6b6ff7e75bb06171012624f018f2c568 --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_schedule.cpp @@ -0,0 +1,1198 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_schedule.h" +#include +#include "aarch64_cg.h" +#include "aarch64_operand.h" +#include "aarch64_dependence.h" +#include "pressure.h" + +/* + * This phase is Instruction Scheduling. + * There is a local list scheduling, it is scheduling in basic block. + * The entry is AArch64Schedule::ListScheduling, will traversal all basic block, + * for a basic block: + * 1. build a dependence graph; + * 2. combine clinit pairs and str&ldr pairs; + * 3. reorder instructions. + */ +namespace maplebe { +namespace { +constexpr uint32 kClinitAdvanceCycle = 10; +constexpr uint32 kAdrpLdrAdvanceCycle = 2; +constexpr uint32 kClinitTailAdvanceCycle = 4; +} + +uint32 AArch64Schedule::maxUnitIndex = 0; +/* Init schedule's data struction. */ +void AArch64Schedule::Init() { + readyList.clear(); + nodeSize = nodes.size(); + lastSeparatorIndex = 0; + mad->ReleaseAllUnits(); + DepNode *node = nodes[0]; + + ASSERT(node->GetType() == kNodeTypeSeparator, "CG internal error, the first node should be a separator node."); + + if (CGOptions::IsDruteForceSched() || CGOptions::IsSimulateSched()) { + for (auto nodeTemp : nodes) { + nodeTemp->SetVisit(0); + nodeTemp->SetState(kNormal); + nodeTemp->SetSchedCycle(0); + nodeTemp->SetEStart(0); + nodeTemp->SetLStart(0); + } + } + + readyList.push_back(node); + node->SetState(kReady); + + /* Init validPredsSize and validSuccsSize. */ + for (auto nodeTemp : nodes) { + nodeTemp->SetValidPredsSize(nodeTemp->GetPreds().size()); + nodeTemp->SetValidSuccsSize(nodeTemp->GetSuccs().size()); + } +} + +/* + * A insn which can be combine should meet this conditions: + * 1. it is str/ldr insn; + * 2. address mode is kAddrModeBOi, [baseReg, offset]; + * 3. the register operand size equal memory operand size; + * 4. if define USE_32BIT_REF, register operand size should be 4 byte; + * 5. for stp/ldp, the imm should be within -512 and 504(64bit), or -256 and 252(32bit); + * 6. pair instr for 8/4 byte registers must have multiple of 8/4 for imm. + * If insn can be combine, return true. + */ +bool AArch64Schedule::CanCombine(const Insn &insn) const { + MOperator opCode = insn.GetMachineOpcode(); + if ((opCode != MOP_xldr) && (opCode != MOP_wldr) && (opCode != MOP_dldr) && (opCode != MOP_sldr) && + (opCode != MOP_xstr) && (opCode != MOP_wstr) && (opCode != MOP_dstr) && (opCode != MOP_sstr)) { + return false; + } + + ASSERT(insn.GetOperand(1).IsMemoryAccessOperand(), "expects mem operands"); + auto &memOpnd = static_cast(insn.GetOperand(1)); + AArch64MemOperand::AArch64AddressingMode addrMode = memOpnd.GetAddrMode(); + if ((addrMode != AArch64MemOperand::kAddrModeBOi) || !memOpnd.IsIntactIndexed()) { + return false; + } + + auto ®Opnd = static_cast(insn.GetOperand(0)); + if (regOpnd.GetSize() != memOpnd.GetSize()) { + return false; + } + + uint32 size = regOpnd.GetSize() >> kLog2BitsPerByte; +#ifdef USE_32BIT_REF + if (insn.IsAccessRefField() && (size > (kIntregBytelen >> 1))) { + return false; + } +#endif /* USE_32BIT_REF */ + + AArch64OfstOperand *offset = memOpnd.GetOffsetImmediate(); + if (offset == nullptr) { + return false; + } + int32 offsetValue = offset->GetOffsetValue(); + if (size == kIntregBytelen) { /* 64 bit */ + if ((offsetValue <= kStpLdpImm64LowerBound) || (offsetValue >= kStpLdpImm64UpperBound)) { + return false; + } + } else if (size == (kIntregBytelen >> 1)) { /* 32 bit */ + if ((offsetValue <= kStpLdpImm32LowerBound) || (offsetValue >= kStpLdpImm32UpperBound)) { + return false; + } + } + + /* pair instr for 8/4 byte registers must have multiple of 8/4 for imm */ + if ((static_cast(offsetValue) % size) != 0) { + return false; + } + return true; +} + +/* After building dependence graph, combine str&ldr pairs. */ +void AArch64Schedule::MemoryAccessPairOpt() { + Init(); + std::vector memList; + + while ((!readyList.empty()) || !memList.empty()) { + DepNode *readNode = nullptr; + if (!readyList.empty()) { + readNode = readyList[0]; + readyList.erase(readyList.begin()); + } else { + if (memList[0]->GetType() != kNodeTypeEmpty) { + FindAndCombineMemoryAccessPair(memList); + } + readNode = memList[0]; + memList.erase(memList.begin()); + } + + /* schedule readNode */ + CHECK_FATAL(readNode != nullptr, "readNode is null in MemoryAccessPairOpt"); + readNode->SetState(kScheduled); + + /* add readNode's succs to readyList or memList. */ + for (auto succLink : readNode->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + succNode.DescreaseValidPredsSize(); + if (succNode.GetValidPredsSize() == 0) { + ASSERT(succNode.GetState() == kNormal, "schedule state should be kNormal"); + succNode.SetState(kReady); + ASSERT(succNode.GetInsn() != nullptr, "insn can't be nullptr!"); + if (CanCombine(*succNode.GetInsn())) { + memList.push_back(&succNode); + } else { + readyList.push_back(&succNode); + } + } + } + } + + for (auto node : nodes) { + node->SetVisit(0); + node->SetState(kNormal); + } +} + +/* Find and combine correct MemoryAccessPair for memList[0]. */ +void AArch64Schedule::FindAndCombineMemoryAccessPair(const std::vector &memList) { + ASSERT(!memList.empty(), "memList should not be empty"); + CHECK_FATAL(memList[0]->GetInsn() != nullptr, "memList[0]'s insn should not be nullptr"); + AArch64MemOperand *currMemOpnd = static_cast(memList[0]->GetInsn()->GetMemOpnd()); + ASSERT(currMemOpnd != nullptr, "opnd should not be nullptr"); + ASSERT(currMemOpnd->IsMemoryAccessOperand(), "opnd should be memOpnd"); + int32 currOffsetVal = currMemOpnd->GetOffsetImmediate()->GetOffsetValue(); + MOperator currMop = memList[0]->GetInsn()->GetMachineOpcode(); + /* find a depNode to combine with memList[0], and break; */ + for (auto it = std::next(memList.begin(), 1); it != memList.end(); ++it) { + ASSERT((*it)->GetInsn() != nullptr, "null ptr check"); + + if (currMop == (*it)->GetInsn()->GetMachineOpcode()) { + AArch64MemOperand *nextMemOpnd = static_cast((*it)->GetInsn()->GetMemOpnd()); + CHECK_FATAL(nextMemOpnd != nullptr, "opnd should not be nullptr"); + CHECK_FATAL(nextMemOpnd->IsMemoryAccessOperand(), "opnd should be MemOperand"); + int32 nextOffsetVal = nextMemOpnd->GetOffsetImmediate()->GetOffsetValue(); + uint32 size = currMemOpnd->GetSize() >> kLog2BitsPerByte; + if ((nextMemOpnd->GetBaseRegister() == currMemOpnd->GetBaseRegister()) && + (nextMemOpnd->GetSize() == currMemOpnd->GetSize()) && + (static_cast(abs(nextOffsetVal - currOffsetVal)) == size)) { + /* + * In ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile + * LDP on page K1-6125 declare that ldp can't use same reg + */ + if (((currMop == MOP_xldr) || (currMop == MOP_sldr) || (currMop == MOP_dldr) || (currMop == MOP_wldr)) && + &(memList[0]->GetInsn()->GetOperand(0)) == &((*it)->GetInsn()->GetOperand(0))) { + continue; + } + + if (LIST_SCHED_DUMP_REF) { + LogInfo::MapleLogger() << "Combine inse: " << "\n"; + memList[0]->GetInsn()->Dump(); + (*it)->GetInsn()->Dump(); + } + depAnalysis->CombineMemoryAccessPair(*memList[0], **it, nextOffsetVal > currOffsetVal); + if (LIST_SCHED_DUMP_REF) { + LogInfo::MapleLogger() << "To: " << "\n"; + memList[0]->GetInsn()->Dump(); + } + break; + } + } + } +} + +/* combine clinit pairs. */ +void AArch64Schedule::ClinitPairOpt() { + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + auto nextIt = std::next(it, 1); + if (nextIt == nodes.end()) { + return; + } + + if ((*it)->GetInsn()->GetMachineOpcode() == MOP_adrp_ldr) { + if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) { + depAnalysis->CombineClinit(**it, **(nextIt), false); + } else if ((*nextIt)->GetType() == kNodeTypeSeparator) { + nextIt = std::next(nextIt, 1); + if (nextIt == nodes.end()) { + return; + } + if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) { + /* Do something. */ + depAnalysis->CombineClinit(**it, **(nextIt), true); + } + } + } + } +} + +/* Return the next node's index who is kNodeTypeSeparator. */ +uint32 AArch64Schedule::GetNextSepIndex() const { + return ((lastSeparatorIndex + kMaxDependenceNum) < nodeSize) ? (lastSeparatorIndex + kMaxDependenceNum) + : (nodes.size() - 1); +} + +/* Do register pressure schduling. */ +void AArch64Schedule::RegPressureScheduling(BB &bb, MapleVector &nodes) { + RegPressureSchedule *regSchedule = memPool.New(cgFunc, alloc); + /* + * Get physical register amount currently + * undef, Int Reg, Floag Reg, Flag Reg + */ + const std::vector kRegNumVec = { 0, V0, kMaxRegNum - V0 + 1, 1 }; + regSchedule->InitBBInfo(bb, memPool, nodes); + regSchedule->BuildPhyRegInfo(kRegNumVec); + regSchedule->DoScheduling(nodes); +} + +/* + * Compute earliest start of the node, + * return value : the maximum estart. + */ +uint32 AArch64Schedule::ComputeEstart(uint32 cycle) { + std::vector readyNodes; + uint32 maxIndex = GetNextSepIndex(); + + if (CGOptions::IsDebugSched()) { + /* Check validPredsSize. */ + for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) { + DepNode *node = nodes[i]; + int32 schedNum = 0; + for (const auto *predLink : node->GetPreds()) { + if (predLink->GetFrom().GetState() == kScheduled) { + ++schedNum; + } + } + ASSERT((node->GetPreds().size() - schedNum) == node->GetValidPredsSize(), "validPredsSize error."); + } + } + + ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator, + "CG internal error, nodes[maxIndex] should be a separator node."); + + readyNodes.insert(readyNodes.begin(), readyList.begin(), readyList.end()); + + uint32 maxEstart = cycle; + for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) { + DepNode *node = nodes[i]; + node->SetVisit(0); + } + + for (auto *node : readyNodes) { + ASSERT(node->GetState() == kReady, "CG internal error, all nodes in ready list should be ready."); + if (node->GetEStart() < cycle) { + node->SetEStart(cycle); + } + } + + while (!readyNodes.empty()) { + DepNode *node = readyNodes.front(); + readyNodes.erase(readyNodes.begin()); + + for (const auto *succLink : node->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + if (succNode.GetType() == kNodeTypeSeparator) { + continue; + } + + if (succNode.GetEStart() < (node->GetEStart() + succLink->GetLatency())) { + succNode.SetEStart(node->GetEStart() + succLink->GetLatency()); + } + maxEstart = (maxEstart < succNode.GetEStart() ? succNode.GetEStart() : maxEstart); + succNode.IncreaseVisit(); + if ((succNode.GetVisit() >= succNode.GetValidPredsSize()) && (succNode.GetType() != kNodeTypeSeparator)) { + readyNodes.push_back(&succNode); + } + ASSERT(succNode.GetVisit() <= succNode.GetValidPredsSize(), "CG internal error."); + } + } + + return maxEstart; +} + +/* Compute latest start of the node. */ +void AArch64Schedule::ComputeLstart(uint32 maxEstart) { + /* std::vector is better than std::queue in run time */ + std::vector readyNodes; + uint32 maxIndex = GetNextSepIndex(); + + ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator, + "CG internal error, nodes[maxIndex] should be a separator node."); + + for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) { + DepNode *node = nodes[i]; + node->SetLStart(maxEstart); + node->SetVisit(0); + } + + readyNodes.push_back(nodes[maxIndex]); + while (!readyNodes.empty()) { + DepNode *node = readyNodes.front(); + readyNodes.erase(readyNodes.begin()); + for (const auto *predLink : node->GetPreds()) { + DepNode &predNode = predLink->GetFrom(); + if (predNode.GetState() == kScheduled) { + continue; + } + + if (predNode.GetLStart() > (node->GetLStart() - predLink->GetLatency())) { + predNode.SetLStart(node->GetLStart() - predLink->GetLatency()); + } + predNode.IncreaseVisit(); + if ((predNode.GetVisit() >= predNode.GetValidSuccsSize()) && (predNode.GetType() != kNodeTypeSeparator)) { + readyNodes.push_back(&predNode); + } + + ASSERT(predNode.GetVisit() <= predNode.GetValidSuccsSize(), "CG internal error."); + } + } +} + +/* Compute earliest start and latest start of the node that is in readyList and not be scheduled. */ +void AArch64Schedule::UpdateELStartsOnCycle(uint32 cycle) { + ComputeLstart(ComputeEstart(cycle)); +} + +/* + * If all unit of this node need when it be scheduling is free, this node can be scheduled, + * Return true. + */ +bool DepNode::CanBeScheduled() const { + for (uint32 i = 0; i < unitNum; ++i) { + Unit *unit = units[i]; + if (unit != nullptr) { + if (!unit->IsFree(i)) { + return false; + } + } + } + + return true; +} + +/* Mark those unit that this node need occupy unit when it is being scheduled. */ +void DepNode::OccupyUnits() { + for (uint32 i = 0; i < unitNum; ++i) { + Unit *unit = units[i]; + if (unit != nullptr) { + unit->Occupy(*insn, i); + } + } +} + +/* Get unit kind of this node's units[0]. */ +uint32 DepNode::GetUnitKind() const { + uint32 retValue = 0; + if ((units == nullptr) || (units[0] == nullptr)) { + return retValue; + } + + switch (units[0]->GetUnitId()) { + case kUnitIdSlotD: + retValue |= kUnitKindSlot0; + break; + case kUnitIdAgen: + case kUnitIdSlotSAgen: + retValue |= kUnitKindAgen; + break; + case kUnitIdSlotDAgen: + retValue |= kUnitKindAgen; + retValue |= kUnitKindSlot0; + break; + case kUnitIdHazard: + case kUnitIdSlotSHazard: + retValue |= kUnitKindHazard; + break; + case kUnitIdCrypto: + retValue |= kUnitKindCrypto; + break; + case kUnitIdMul: + case kUnitIdSlotSMul: + retValue |= kUnitKindMul; + break; + case kUnitIdDiv: + retValue |= kUnitKindDiv; + break; + case kUnitIdBranch: + case kUnitIdSlotSBranch: + retValue |= kUnitKindBranch; + break; + case kUnitIdStAgu: + retValue |= kUnitKindStAgu; + break; + case kUnitIdLdAgu: + retValue |= kUnitKindLdAgu; + break; + case kUnitIdFpAluS: + case kUnitIdFpAluD: + retValue |= kUnitKindFpAlu; + break; + case kUnitIdFpMulS: + case kUnitIdFpMulD: + retValue |= kUnitKindFpMul; + break; + case kUnitIdFpDivS: + case kUnitIdFpDivD: + retValue |= kUnitKindFpDiv; + break; + case kUnitIdSlot0LdAgu: + retValue |= kUnitKindSlot0; + retValue |= kUnitKindLdAgu; + break; + case kUnitIdSlot0StAgu: + retValue |= kUnitKindSlot0; + retValue |= kUnitKindStAgu; + break; + default: + break; + } + + return retValue; +} + +/* Count unit kinds to an array. Each element of the array indicates the unit kind number of a node set. */ +void AArch64Schedule::CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const { + (void)arraySize; + ASSERT(arraySize >= kUnitKindLast, "CG internal error. unit kind number is not correct."); + uint32 unitKind = depNode.GetUnitKind(); + int32 index = __builtin_ffs(unitKind); + while (index) { + ASSERT(index < kUnitKindLast, "CG internal error. index error."); + ++array[index]; + unitKind &= ~(1u << (index - 1u)); + index = __builtin_ffs(unitKind); + } +} + +/* Check if a node use a specific unit kind. */ +bool AArch64Schedule::IfUseUnitKind(const DepNode &depNode, uint32 index) { + uint32 unitKind = depNode.GetUnitKind(); + int32 idx = __builtin_ffs(unitKind); + while (idx) { + ASSERT(index < kUnitKindLast, "CG internal error. index error."); + if (idx == index) { + return true; + } + unitKind &= ~(1u << (idx - 1u)); + idx = __builtin_ffs(unitKind); + } + + return false; +} + +/* A sample schedule according dependence graph only, to verify correctness of dependence graph. */ +void AArch64Schedule::RandomTest() { + Init(); + nodes.clear(); + + while (!readyList.empty()) { + DepNode *currNode = readyList.back(); + currNode->SetState(kScheduled); + readyList.pop_back(); + nodes.push_back(currNode); + + for (auto succLink : currNode->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + bool ready = true; + for (auto predLink : succNode.GetPreds()) { + DepNode &predNode = predLink->GetFrom(); + if (predNode.GetState() != kScheduled) { + ready = false; + break; + } + } + + if (ready) { + ASSERT(succNode.GetState() == kNormal, "succNode must be kNormal"); + readyList.push_back(&succNode); + succNode.SetState(kReady); + } + } + } +} + +/* Remove target from readyList. */ +void AArch64Schedule::EraseNodeFromReadyList(const DepNode &target) { + EraseNodeFromNodeList(target, readyList); +} + +/* Remove target from nodeList. */ +void AArch64Schedule::EraseNodeFromNodeList(const DepNode &target, MapleVector &nodeList) { + for (auto it = nodeList.begin(); it != nodeList.end(); ++it) { + if ((*it) == &target) { + nodeList.erase(it); + return; + } + } + + ASSERT(false, "CG internal error, erase node fail."); +} + +/* Dump all node of availableReadyList schedule information in current cycle. */ +void AArch64Schedule::DumpDebugInfo(const ScheduleProcessInfo &scheduleInfo) { + LogInfo::MapleLogger() << "Current cycle[ " << scheduleInfo.GetCurrCycle() << " ], Available in readyList is : \n"; + for (auto node : scheduleInfo.GetAvailableReadyList()) { + LogInfo::MapleLogger() << "NodeIndex[ " << node->GetIndex() + << " ], Estart[ " << node->GetEStart() << " ], Lstart[ "; + LogInfo::MapleLogger() << node->GetLStart() << " ], slot[ "; + LogInfo::MapleLogger() << + (node->GetReservation() == nullptr ? "SlotNone" : node->GetReservation()->GetSlotName()) << " ], "; + LogInfo::MapleLogger() << "succNodeNum[ " << node->GetSuccs().size() << " ], "; + node->GetInsn()->Dump(); + LogInfo::MapleLogger() << '\n'; + } +} + +/* + * Select a node from availableReadyList according to some heuristic rules, then: + * 1. change targetNode's schedule information; + * 2. try to add successors of targetNode to readyList; + * 3. update unscheduled node set, when targetNode is last kNodeTypeSeparator; + * 4. update AdvanceCycle. + */ +void AArch64Schedule::SelectNode(ScheduleProcessInfo &scheduleInfo) { + auto &availableReadyList = scheduleInfo.GetAvailableReadyList(); + auto it = availableReadyList.begin(); + DepNode *targetNode = *it; + if (availableReadyList.size() > 1) { + CalculateMaxUnitKindCount(scheduleInfo); + ++it; + for (; it != availableReadyList.end(); ++it) { + if (CompareDepNode(**it, *targetNode)) { + targetNode = *it; + } + } + } + scheduleInfo.PushElemIntoScheduledNodes(targetNode); + EraseNodeFromReadyList(*targetNode); + + if (CGOptions::IsDebugSched()) { + LogInfo::MapleLogger() << "TargetNode : "; + targetNode->GetInsn()->Dump(); + LogInfo::MapleLogger() << "\n"; + } + + /* Update readyList. */ + UpdateReadyList(*targetNode, readyList, true); + + if (targetNode->GetType() == kNodeTypeSeparator) { + /* If target node is separator node, update lastSeparatorIndex and calculate those depNodes's estart and lstart + * between current separator node and new Separator node. + */ + if (!scheduleInfo.IsFirstSeparator()) { + lastSeparatorIndex += kMaxDependenceNum; + UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle()); + } else { + scheduleInfo.ResetIsFirstSeparator(); + } + } + + switch (targetNode->GetInsn()->GetLatencyType()) { + case kLtClinit: + scheduleInfo.SetAdvanceCycle(kClinitAdvanceCycle); + break; + case kLtAdrpLdr: + scheduleInfo.SetAdvanceCycle(kAdrpLdrAdvanceCycle); + break; + case kLtClinitTail: + scheduleInfo.SetAdvanceCycle(kClinitTailAdvanceCycle); + break; + default: + break; + } + + if ((scheduleInfo.GetAdvanceCycle() == 0) && mad->IsFullIssued()) { + scheduleInfo.SetAdvanceCycle(1); + } +} + +/* + * Advance mad's cycle until info's advanceCycle equal zero, + * and then clear info's availableReadyList. + */ +void AArch64Schedule::UpdateScheduleProcessInfo(ScheduleProcessInfo &info) { + while (info.GetAdvanceCycle() > 0) { + info.IncCurrCycle(); + mad->AdvanceCycle(); + info.DecAdvanceCycle(); + } + info.ClearAvailableReadyList(); +} + +/* + * Forward traversal readyList, if a node in readyList can be Schedule, add it to availableReadyList. + * Return true, if availableReadyList is not empty. + */ +bool AArch64Schedule::CheckSchedulable(ScheduleProcessInfo &info) const { + for (auto node : readyList) { + if (node->CanBeScheduled() && node->GetEStart() <= info.GetCurrCycle()) { + info.PushElemIntoAvailableReadyList(node); + } + } + + if (info.AvailableReadyListIsEmpty()) { + return false; + } + return true; +} + +/* After building dependence graph, schedule insns. */ +uint32 AArch64Schedule::DoSchedule() { + ScheduleProcessInfo scheduleInfo(nodeSize); + Init(); + UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle()); + + while (!readyList.empty()) { + UpdateScheduleProcessInfo(scheduleInfo); + /* Check if schedulable */ + if (!CheckSchedulable(scheduleInfo)) { + /* Advance cycle. */ + scheduleInfo.SetAdvanceCycle(1); + continue; + } + + if (scheduleInfo.GetLastUpdateCycle() < scheduleInfo.GetCurrCycle()) { + scheduleInfo.SetLastUpdateCycle(scheduleInfo.GetCurrCycle()); + } + + if (CGOptions::IsDebugSched()) { + DumpDebugInfo(scheduleInfo); + } + + /* Select a node to scheduling */ + SelectNode(scheduleInfo); + } + + ASSERT(scheduleInfo.SizeOfScheduledNodes() == nodes.size(), "CG internal error, Not all nodes scheduled."); + + nodes.clear(); + nodes.insert(nodes.begin(), scheduleInfo.GetScheduledNodes().begin(), scheduleInfo.GetScheduledNodes().end()); + /* the second to last node is the true last node, because the last is kNodeTypeSeparator node */ + ASSERT(nodes.size() - 2 >= 0, "size of nodes should be greater than or equal 2"); + return (nodes[nodes.size() - 2]->GetSchedCycle()); +} + +/* + * Comparing priorities of node1 and node2 according to some heuristic rules + * return true if node1's priority is higher + */ +bool AArch64Schedule::CompareDepNode(const DepNode &node1, const DepNode &node2) { + /* less LStart first */ + if (node1.GetLStart() != node2.GetLStart()) { + return node1.GetLStart() < node2.GetLStart(); + } + + /* max unit kind use */ + bool use1 = IfUseUnitKind(node1, maxUnitIndex); + bool use2 = IfUseUnitKind(node2, maxUnitIndex); + if (use1 != use2) { + return use1; + } + + /* slot0 first */ + SlotType slotType1 = node1.GetReservation()->GetSlot(); + SlotType slotType2 = node2.GetReservation()->GetSlot(); + if (slotType1 == kSlots) { + slotType1 = kSlot0; + } + if (slotType2 == kSlots) { + slotType2 = kSlot0; + } + if (slotType1 != slotType2) { + return slotType1 < slotType2; + } + + /* more succNodes fisrt */ + if (node1.GetSuccs().size() != node2.GetSuccs().size()) { + return node1.GetSuccs().size() > node2.GetSuccs().size(); + } + + /* default order */ + return node1.GetInsn()->GetId() < node2.GetInsn()->GetId(); +} + +/* + * Calculate number of every unit that used by avaliableReadyList's nodes and save the max in maxUnitIndex + */ +void AArch64Schedule::CalculateMaxUnitKindCount(ScheduleProcessInfo &scheduleInfo) { + uint32 unitKindCount[kUnitKindLast] = { 0 }; + for (auto node : scheduleInfo.GetAvailableReadyList()) { + CountUnitKind(*node, unitKindCount, kUnitKindLast); + } + + uint32 maxCount = 0; + maxUnitIndex = 0; + for (size_t i = 1; i < kUnitKindLast; ++i) { + if (maxCount < unitKindCount[i]) { + maxCount = unitKindCount[i]; + maxUnitIndex = i; + } + } +} + +/* + * A simulated schedule: + * scheduling instruction in original order to calculate original execute cycles. + */ +uint32 AArch64Schedule::SimulateOnly() { + uint32 currCycle = 0; + uint32 advanceCycle = 0; + Init(); + + for (uint32 i = 0; i < nodes.size();) { + while (advanceCycle > 0) { + ++currCycle; + mad->AdvanceCycle(); + --advanceCycle; + } + + DepNode *targetNode = nodes[i]; + if ((currCycle >= targetNode->GetEStart()) && targetNode->CanBeScheduled()) { + targetNode->SetSimulateCycle(currCycle); + targetNode->OccupyUnits(); + + /* Update estart. */ + for (auto succLink : targetNode->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + uint32 eStart = currCycle + succLink->GetLatency(); + if (succNode.GetEStart() < eStart) { + succNode.SetEStart(eStart); + } + } + + if (CGOptions::IsDebugSched()) { + LogInfo::MapleLogger() << "[Simulate] TargetNode : "; + targetNode->GetInsn()->Dump(); + LogInfo::MapleLogger() << "\n"; + } + + switch (targetNode->GetInsn()->GetLatencyType()) { + case kLtClinit: + advanceCycle = kClinitAdvanceCycle; + break; + case kLtAdrpLdr: + advanceCycle = kAdrpLdrAdvanceCycle; + break; + case kLtClinitTail: + advanceCycle = kClinitTailAdvanceCycle; + break; + default: + break; + } + + ++i; + } else { + advanceCycle = 1; + } + } + /* the second to last node is the true last node, because the last is kNodeTypeSeparator nod */ + ASSERT(nodes.size() - 2 >= 0, "size of nodes should be greater than or equal 2"); + return (nodes[nodes.size() - 2]->GetSimulateCycle()); +} + +/* Restore dependence graph to normal CGIR. */ +void AArch64Schedule::FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis) { + bb.ClearInsns(); + + const Insn *prevLocInsn = (bb.GetPrev() != nullptr ? bb.GetPrev()->GetLastLoc() : nullptr); + for (auto node : nodes) { + /* Append comments first. */ + for (auto comment : node->GetComments()) { + bb.AppendInsn(*comment); + } + /* Append insn. */ + if (!node->GetClinitInsns().empty()) { + for (auto clinit : node->GetClinitInsns()) { + bb.AppendInsn(*clinit); + } + } else if (node->GetType() == kNodeTypeNormal) { + bb.AppendInsn(*node->GetInsn()); + } + + /* Append cfi instructions. */ + for (auto cfi : node->GetCfiInsns()) { + bb.AppendInsn(*cfi); + } + } + bb.SetLastLoc(prevLocInsn); + + for (auto lastComment : depAnalysis.GetLastComments()) { + bb.AppendInsn(*lastComment); + } +} + +/* For every node of nodes, update it's bruteForceSchedCycle. */ +void AArch64Schedule::UpdateBruteForceSchedCycle() { + for (auto node : nodes) { + node->SetBruteForceSchedCycle(node->GetSchedCycle()); + } +} + +/* Recursively schedule all of the possible node. */ +void AArch64Schedule::IterateBruteForce(DepNode &targetNode, MapleVector &readyList, uint32 currCycle, + MapleVector &scheduledNodes, uint32 &maxCycleCount, + MapleVector &optimizedScheduledNodes) { + /* Save states. */ + constexpr int32 unitSize = 31; + ASSERT(unitSize == mad->GetAllUnitsSize(), "CG internal error."); + std::vector occupyTable; + occupyTable.resize(unitSize, 0); + mad->SaveStates(occupyTable, unitSize); + + /* Schedule targetNode first. */ + targetNode.SetState(kScheduled); + targetNode.SetSchedCycle(currCycle); + scheduledNodes.push_back(&targetNode); + + MapleVector tempList = readyList; + EraseNodeFromNodeList(targetNode, tempList); + targetNode.OccupyUnits(); + + /* Update readyList. */ + UpdateReadyList(targetNode, tempList, true); + + if (targetNode.GetType() == kNodeTypeSeparator) { + /* If target node is separator node, update lastSeparatorIndex. */ + lastSeparatorIndex += kMaxDependenceNum; + } + + if (tempList.empty()) { + ASSERT(scheduledNodes.size() == nodes.size(), "CG internal error, Not all nodes scheduled."); + if (currCycle < maxCycleCount) { + maxCycleCount = currCycle; + UpdateBruteForceSchedCycle(); + optimizedScheduledNodes = scheduledNodes; + } + } else { + uint32 advanceCycle = 0; + switch (targetNode.GetInsn()->GetLatencyType()) { + case kLtClinit: + advanceCycle = kClinitAdvanceCycle; + break; + case kLtAdrpLdr: + advanceCycle = kAdrpLdrAdvanceCycle; + break; + case kLtClinitTail: + advanceCycle = kClinitTailAdvanceCycle; + break; + default: + break; + } + + do { + std::vector availableReadyList; + std::vector tempAvailableList; + while (advanceCycle > 0) { + ++currCycle; + mad->AdvanceCycle(); + --advanceCycle; + } + /* Check EStart. */ + for (auto node : tempList) { + if (node->GetEStart() <= currCycle) { + tempAvailableList.push_back(node); + } + } + + if (tempAvailableList.empty()) { + /* Advance cycle. */ + advanceCycle = 1; + continue; + } + + /* Check if schedulable */ + for (auto node : tempAvailableList) { + if (node->CanBeScheduled()) { + availableReadyList.push_back(node); + } + } + + if (availableReadyList.empty()) { + /* Advance cycle. */ + advanceCycle = 1; + continue; + } + + for (auto node : availableReadyList) { + IterateBruteForce(*node, tempList, currCycle, scheduledNodes, maxCycleCount, optimizedScheduledNodes); + } + + break; + } while (true); + } + + /* + * Recover states. + * Restore targetNode first. + */ + targetNode.SetState(kReady); + targetNode.SetSchedCycle(0); + scheduledNodes.pop_back(); + mad->RestoreStates(occupyTable, unitSize); + + /* Update readyList. */ + for (auto succLink : targetNode.GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + succNode.IncreaseValidPredsSize(); + succNode.SetState(kNormal); + } + + if (targetNode.GetType() == kNodeTypeSeparator) { + /* If target node is separator node, update lastSeparatorIndex. */ + lastSeparatorIndex -= kMaxDependenceNum; + } +} + +/* + * Brute force schedule: + * Finding all possibile schedule list of current bb, and calculate every list's execute cycles, + * return the optimal schedule list and it's cycles. + */ +uint32 AArch64Schedule::DoBruteForceSchedule() { + MapleVector scheduledNodes(alloc.Adapter()); + MapleVector optimizedScheduledNodes(alloc.Adapter()); + + uint32 currCycle = 0; + uint32 maxCycleCount = 0xFFFFFFFF; + Init(); + + /* Schedule First separator. */ + DepNode *targetNode = readyList.front(); + targetNode->SetState(kScheduled); + targetNode->SetSchedCycle(currCycle); + scheduledNodes.push_back(targetNode); + readyList.clear(); + + /* Update readyList. */ + UpdateReadyList(*targetNode, readyList, false); + + ASSERT(targetNode->GetType() == kNodeTypeSeparator, "The first node should be separator node."); + ASSERT(!readyList.empty(), "readyList should not be empty."); + + for (auto targetNodeTemp : readyList) { + IterateBruteForce(*targetNodeTemp, readyList, currCycle, scheduledNodes, maxCycleCount, optimizedScheduledNodes); + } + + nodes = optimizedScheduledNodes; + return maxCycleCount; +} + +/* + * Update ready list after the targetNode has been scheduled. + * For every targetNode's successor, if it's all predecessors have been scheduled, + * add it to ready list and update it's information (like state, estart). + */ +void AArch64Schedule::UpdateReadyList(DepNode &targetNode, MapleVector &readyList, bool updateEStart) { + for (auto succLink : targetNode.GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + succNode.DescreaseValidPredsSize(); + if (succNode.GetValidPredsSize() == 0) { + readyList.push_back(&succNode); + succNode.SetState(kReady); + + /* Set eStart. */ + if (updateEStart) { + uint32 maxEstart = 0; + for (auto predLink : succNode.GetPreds()) { + DepNode &predNode = predLink->GetFrom(); + uint32 eStart = predNode.GetSchedCycle() + predLink->GetLatency(); + maxEstart = (maxEstart < eStart ? eStart : maxEstart); + } + succNode.SetEStart(maxEstart); + } + } + } +} + +/* For every node of nodes, dump it's Depdence information. */ +void AArch64Schedule::DumpDepGraph(const MapleVector &nodes) const { + for (auto node : nodes) { + depAnalysis->DumpDepNode(*node); + LogInfo::MapleLogger() << "---------- preds ----------" << "\n"; + for (auto pred : node->GetPreds()) { + depAnalysis->DumpDepLink(*pred, &(pred->GetFrom())); + } + LogInfo::MapleLogger() << "---------- succs ----------" << "\n"; + for (auto succ : node->GetSuccs()) { + depAnalysis->DumpDepLink(*succ, &(succ->GetTo())); + } + LogInfo::MapleLogger() << "---------------------------" << "\n"; + } +} + +/* For every node of nodes, dump it's schedule time according simulate type and instruction information. */ +void AArch64Schedule::DumpScheduleResult(const MapleVector &nodes, SimulateType type) const { + for (auto node : nodes) { + LogInfo::MapleLogger() << "cycle[ "; + switch (type) { + case kListSchedule: + LogInfo::MapleLogger() << node->GetSchedCycle(); + break; + case kBruteForce: + LogInfo::MapleLogger() << node->GetBruteForceSchedCycle(); + break; + case kSimulateOnly: + LogInfo::MapleLogger() << node->GetSimulateCycle(); + break; + } + LogInfo::MapleLogger() << " ] "; + node->GetInsn()->Dump(); + LogInfo::MapleLogger() << "\n"; + } +} + +/* Print bb's dependence dot graph information to a file. */ +void AArch64Schedule::GenerateDot(const BB &bb, const MapleVector &nodes) const { + std::streambuf *coutBuf = std::cout.rdbuf(); /* keep original cout buffer */ + std::ofstream dgFile; + std::streambuf *buf = dgFile.rdbuf(); + std::cout.rdbuf(buf); + + /* construct the file name */ + std::string fileName; + fileName.append(phaseName); + fileName.append("_"); + fileName.append(cgFunc.GetName()); + fileName.append("_BB"); + auto str = std::to_string(bb.GetId()); + fileName.append(str); + fileName.append("_dep_graph.dot"); + + dgFile.open(fileName.c_str(), std::ios::trunc); + if (!dgFile.is_open()) { + LogInfo::MapleLogger(kLlWarn) << "fileName:" << fileName << " open failure.\n"; + return; + } + dgFile << "digraph {\n"; + for (auto node : nodes) { + for (auto succ : node->GetSuccs()) { + dgFile << "insn" << node->GetInsn() << " -> " << "insn" << succ->GetTo().GetInsn(); + dgFile << " ["; + if (succ->GetDepType() == kDependenceTypeTrue) { + dgFile << "color=red,"; + } + dgFile << "label= \"" << succ->GetLatency() << "\""; + dgFile << "];\n"; + } + } + + for (auto node : nodes) { + MOperator mOp = node->GetInsn()->GetMachineOpcode(); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + dgFile << "insn" << node->GetInsn() << "["; + dgFile << "shape=box,label= \" " << node->GetInsn()->GetId() << ":\n"; + dgFile << "{ "; + dgFile << md->name << "\n"; + dgFile << "}\"];\n"; + } + dgFile << "}\n"; + dgFile.flush(); + dgFile.close(); + std::cout.rdbuf(coutBuf); +} + +/* Do brute force scheduling and dump scheduling information */ +void AArch64Schedule::BruteForceScheduling(const BB &bb) { + LogInfo::MapleLogger() << "\n\n$$ Function: " << cgFunc.GetName(); + LogInfo::MapleLogger() << "\n BB id = " << bb.GetId() << "; nodes.size = " << nodes.size() << "\n"; + + constexpr uint32 maxBruteForceNum = 50; + if (nodes.size() < maxBruteForceNum) { + GenerateDot(bb, nodes); + uint32 maxBruteForceCycle = DoBruteForceSchedule(); + MapleVector bruteNodes = nodes; + uint32 maxSchedCycle = DoSchedule(); + if (maxBruteForceCycle < maxSchedCycle) { + LogInfo::MapleLogger() << "maxBruteForceCycle = " << maxBruteForceCycle << "; maxSchedCycle = "; + LogInfo::MapleLogger() << maxSchedCycle << "\n"; + LogInfo::MapleLogger() << "\n ## Dump dependence graph ## " << "\n"; + DumpDepGraph(nodes); + LogInfo::MapleLogger() << "\n ** Dump bruteForce scheduling result." << "\n"; + DumpScheduleResult(bruteNodes, kBruteForce); + LogInfo::MapleLogger() << "\n ^^ Dump list scheduling result." << "\n"; + DumpScheduleResult(nodes, kListSchedule); + } + } else { + LogInfo::MapleLogger() << "Skip BruteForce scheduling." << "\n"; + DoSchedule(); + } +} + +/* Do simulate scheduling and dump scheduling information */ +void AArch64Schedule::SimulateScheduling(const BB &bb) { + uint32 originCycle = SimulateOnly(); + MapleVector oldNodes = nodes; + uint32 schedCycle = DoSchedule(); + if (originCycle < schedCycle) { + LogInfo::MapleLogger() << "Worse cycle [ " << (schedCycle - originCycle) << " ]; "; + LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = "; + LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size(); + LogInfo::MapleLogger() << "; $$ Function: " << cgFunc.GetName(); + LogInfo::MapleLogger() << "; BB id = " << bb.GetId() << "\n"; + LogInfo::MapleLogger() << "\n ** Dump original result." << "\n"; + DumpScheduleResult(oldNodes, kSimulateOnly); + LogInfo::MapleLogger() << "\n ^^ Dump list scheduling result." << "\n"; + DumpScheduleResult(nodes, kListSchedule); + } else if (originCycle > schedCycle) { + LogInfo::MapleLogger() << "Advance cycle [ " << (originCycle - schedCycle) << " ]; "; + LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = "; + LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size(); + LogInfo::MapleLogger() << "; $$ Function: " << cgFunc.GetName(); + LogInfo::MapleLogger() << "; BB id = " << bb.GetId() << "\n"; + } else { + LogInfo::MapleLogger() << "Equal cycle [ 0 ]; originCycle = " << originCycle; + LogInfo::MapleLogger() << " ], ignore. nodes.size = " << nodes.size() << "\n"; + } +} + +/* + * A local list scheduling. + * Schedule insns in basic blocks. + */ +void AArch64Schedule::ListScheduling(bool beforeRA) { + InitIDAndLoc(); + + mad = Globals::GetInstance()->GetMAD(); + depAnalysis = memPool.New(cgFunc, memPool, *mad, beforeRA); + + FOR_ALL_BB(bb, &cgFunc) { + depAnalysis->Run(*bb, nodes); + + ClinitPairOpt(); + MemoryAccessPairOpt(); + if (LIST_SCHED_DUMP_REF) { + GenerateDot(*bb, nodes); + DumpDepGraph(nodes); + } + if (beforeRA) { + RegPressure::SetMaxRegClassNum(kRegisterLast); + RegPressureScheduling(*bb, nodes); + } else { + if (CGOptions::IsDruteForceSched()) { + BruteForceScheduling(*bb); + } else if (CGOptions::IsSimulateSched()) { + SimulateScheduling(*bb); + } else { + DoSchedule(); + } + } + + FinalizeScheduling(*bb, *depAnalysis); + } +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/aarch64/aarch64_strldr.cpp b/src/maple_be/src/cg/aarch64/aarch64_strldr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8a755c475d8474f19bec888146479ff9d8e78afd --- /dev/null +++ b/src/maple_be/src/cg/aarch64/aarch64_strldr.cpp @@ -0,0 +1,326 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "aarch64_strldr.h" +#include "aarch64_reaching.h" +#include "aarch64_cgfunc.h" +#include "common_utils.h" + +namespace maplebe { +using namespace maple; + +static MOperator SelectMovMop(bool isFloatOrSIMD, bool is64Bit) { + return isFloatOrSIMD ? (is64Bit ? MOP_xvmovd : MOP_xvmovs) + : (is64Bit ? MOP_xmovrr : MOP_wmovrr); +} + +void AArch64StoreLoadOpt::Run() { + /* if the number of BB is too large, don't optimize. */ + if (cgFunc.NumBBs() > kMaxBBNum || cgFunc.GetRD()->GetMaxInsnNO() > kMaxInsnNum) { + return; + } + DoStoreLoadOpt(); +} + +/* + * Transfer: store x100, [MEM] + * ... // May exist branches. + * load x200, [MEM] + * ==> + * OPT_VERSION_STR_LIVE: + * store x100, [MEM] + * ... // May exist branches. if x100 not dead here. + * mov x200, x100 + * OPT_VERSION_STR_DIE: + * store x100, [MEM] + * mov x9000(new reg), x100 + * ... // May exist branches. if x100 dead here. + * mov x200, x9000 + * Params: + * strInsn: indicate store insn. + * strSrcIdx: index of source register operand of store insn. (x100 in this example) + * memSeq: represent first memOpreand or second memOperand + * memUseInsnSet: insns using memOperand + */ +void AArch64StoreLoadOpt::DoLoadToMoveTransfer(Insn &strInsn, short strSrcIdx, + short memSeq, const InsnSet &memUseInsnSet) { + /* stp instruction need two registers, str only need one register */ + ASSERT(strSrcIdx < kDivide2, "CG internal error."); + /* Find x100's definition insn. */ + InsnSet regDefInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(strInsn, strSrcIdx); + ASSERT(!regDefInsnSet.empty(), "RegOperand is used before defined"); + if (regDefInsnSet.size() != 1) { + return; + } + for (auto *ldrInsn : memUseInsnSet) { + if (!ldrInsn->IsLoad() || (ldrInsn->GetResultNum() > 1) || ldrInsn->GetBB()->IsCleanup()) { + continue; + } + + /* ldr x200, [mem], mem index is 1, x200 index is 0 */ + InsnSet memDefInsnSet = cgFunc.GetRD()->FindDefForMemOpnd(*ldrInsn, kInsnSecondOpnd); + ASSERT(!memDefInsnSet.empty(), "load insn should have definitions."); + /* If load has multiple definition, continue. */ + if (memDefInsnSet.size() > 1) { + continue; + } + + Operand &resOpnd = ldrInsn->GetOperand(kInsnFirstOpnd); + Operand &srcOpnd = strInsn.GetOperand(strSrcIdx); + ASSERT(resOpnd.GetSize() == srcOpnd.GetSize(), "For stack location, the size of src and dst should be same."); + + auto &resRegOpnd = static_cast(resOpnd); + auto &srcRegOpnd = static_cast(srcOpnd); + if (resRegOpnd.GetRegisterType() != srcRegOpnd.GetRegisterType()) { + continue; + } + + /* Check if use operand of store is live at load insn. */ + if (cgFunc.GetRD()->RegIsLiveBetweenInsn(srcRegOpnd.GetRegisterNumber(), strInsn, *ldrInsn)) { + GenerateMoveLiveInsn(resRegOpnd, srcRegOpnd, *ldrInsn, strInsn, memSeq); + } else { + GenerateMoveDeadInsn(resRegOpnd, srcRegOpnd, *ldrInsn, strInsn, memSeq); + } + + if (CG_DEBUG_FUNC(&cgFunc)) { + LogInfo::MapleLogger() << "Do store-load optimization 1: str version"; + LogInfo::MapleLogger() << cgFunc.GetName() << '\n'; + LogInfo::MapleLogger() << "Store insn: "; + strInsn.Dump(); + LogInfo::MapleLogger() << "Load insn: "; + ldrInsn->Dump(); + } + } +} + +void AArch64StoreLoadOpt::GenerateMoveLiveInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq) { + MOperator movMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + Insn *movInsn = nullptr; + if (str2MovMap[&strInsn][memSeq] != nullptr) { + Insn *movInsnOfStr = str2MovMap[&strInsn][memSeq]; + auto &vregOpnd = static_cast(movInsnOfStr->GetOperand(kInsnFirstOpnd)); + movInsn = &cgFunc.GetCG()->BuildInstruction(movMop, resRegOpnd, vregOpnd); + } else { + movInsn = &cgFunc.GetCG()->BuildInstruction(movMop, resRegOpnd, srcRegOpnd); + } + movInsn->SetId(ldrInsn.GetId()); + ldrInsn.GetBB()->ReplaceInsn(ldrInsn, *movInsn); + /* Add comment. */ + MapleString newComment = ldrInsn.GetComment(); + if (strInsn.IsStorePair()) { + newComment += "; stp-load live version."; + } else { + newComment += "; str-load live version."; + } + movInsn->SetComment(newComment); + cgFunc.GetRD()->InitGenUse(*ldrInsn.GetBB(), false); +} + +void AArch64StoreLoadOpt::GenerateMoveDeadInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq) { + Insn *newMovInsn = nullptr; + RegOperand *vregOpnd = nullptr; + + if (str2MovMap[&strInsn][memSeq] == nullptr) { + RegType regTy = srcRegOpnd.IsOfFloatOrSIMDClass() ? kRegTyFloat : kRegTyInt; + regno_t vRegNO = + cgFunc.NewVReg(regTy, srcRegOpnd.GetSize() <= k32BitSize ? k4ByteSize : k8ByteSize); + /* generate a new vreg, check if the size of DataInfo is big enough */ + if (vRegNO >= cgFunc.GetRD()->GetRegSize(*strInsn.GetBB())) { + cgFunc.GetRD()->EnlargeRegCapacity(vRegNO); + } + vregOpnd = &cgFunc.CreateVirtualRegisterOperand(vRegNO); + MOperator newMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + newMovInsn = &cgFunc.GetCG()->BuildInstruction(newMop, *vregOpnd, srcRegOpnd); + newMovInsn->SetId(strInsn.GetId() + memSeq + 1); + strInsn.GetBB()->InsertInsnAfter(strInsn, *newMovInsn); + str2MovMap[&strInsn][memSeq] = newMovInsn; + /* update DataInfo */ + cgFunc.GetRD()->UpdateInOut(*strInsn.GetBB(), true); + } else { + newMovInsn = str2MovMap[&strInsn][memSeq]; + vregOpnd = &static_cast(newMovInsn->GetOperand(kInsnFirstOpnd)); + } + MOperator movMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + Insn &movInsn = cgFunc.GetCG()->BuildInstruction(movMop, resRegOpnd, *vregOpnd); + movInsn.SetId(ldrInsn.GetId()); + ldrInsn.GetBB()->ReplaceInsn(ldrInsn, movInsn); + + /* Add comment. */ + MapleString newComment = ldrInsn.GetComment(); + if (strInsn.IsStorePair()) { + newComment += "; stp-load die version."; + } else { + newComment += "; str-load die version."; + } + movInsn.SetComment(newComment); + cgFunc.GetRD()->InitGenUse(*ldrInsn.GetBB(), false); +} + +/* + * Transfer: store wzr, [MEM] + * ... // May exist branches. + * load x200, [MEM] + * ==> + * OPT_VERSION_STP_ZERO / OPT_VERSION_STR_ZERO: + * store wzr, [MEM] + * ... // May exist branches. if x100 not dead here. + * mov x200, wzr + * + * Params: + * stInsn: indicate store insn. + * strSrcIdx: index of source register operand of store insn. (wzr in this example) + * memUseInsnSet: insns using memOperand + */ +void AArch64StoreLoadOpt::DoLoadZeroToMoveTransfer(const Insn &strInsn, short strSrcIdx, + const InsnSet &memUseInsnSet) const { + /* comment for strInsn should be only added once */ + for (auto *ldrInsn : memUseInsnSet) { + /* Currently we don't support useInsn is ldp insn. */ + if (!ldrInsn->IsLoad() || ldrInsn->GetResultNum() > 1) { + continue; + } + /* ldr reg, [mem], the index of [mem] is 1 */ + InsnSet defInsnForUseInsns = cgFunc.GetRD()->FindDefForMemOpnd(*ldrInsn, 1); + /* If load has multiple definition, continue. */ + if (defInsnForUseInsns.size() > 1) { + continue; + } + + auto &resOpnd = ldrInsn->GetOperand(0); + auto &srcOpnd = strInsn.GetOperand(strSrcIdx); + + ASSERT(resOpnd.GetSize() == srcOpnd.GetSize(), "For stack location, the size of src and dst should be same."); + RegOperand &resRegOpnd = static_cast(resOpnd); + MOperator movMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + Insn &movInsn = cgFunc.GetCG()->BuildInstruction(movMop, resOpnd, srcOpnd); + movInsn.SetId(ldrInsn->GetId()); + ldrInsn->GetBB()->ReplaceInsn(*ldrInsn, movInsn); + + /* Add comment. */ + MapleString newComment = ldrInsn->GetComment(); + newComment += ", str-load zero version"; + movInsn.SetComment(newComment); + } +} + +bool AArch64StoreLoadOpt::CheckStoreOpCode(MOperator opCode) const { + switch (opCode) { + case MOP_wstr: + case MOP_xstr: + case MOP_sstr: + case MOP_dstr: + case MOP_wstp: + case MOP_xstp: + case MOP_sstp: + case MOP_dstp: + case MOP_wstrb: + case MOP_wstrh: + return true; + default: + return false; + } +} + +/* + * Optimize: store x100, [MEM] + * ... // May exist branches. + * load x200, [MEM] + * ==> + * OPT_VERSION_STP_LIVE / OPT_VERSION_STR_LIVE: + * store x100, [MEM] + * ... // May exist branches. if x100 not dead here. + * mov x200, x100 + * OPT_VERSION_STP_DIE / OPT_VERSION_STR_DIE: + * store x100, [MEM] + * mov x9000(new reg), x100 + * ... // May exist branches. if x100 dead here. + * mov x200, x9000 + * + * Note: x100 may be wzr/xzr registers. + */ +void AArch64StoreLoadOpt::DoStoreLoadOpt() { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction() || !insn->IsStore() || !CheckStoreOpCode(insn->GetMachineOpcode())) { + continue; + } + if (insn->IsStorePair()) { + ProcessStrPair(*insn); + continue; + } + ProcessStr(*insn); + } + } +} + +void AArch64StoreLoadOpt::ProcessStrPair(Insn &insn) { + const short memIndex = 2; + short regIndex = 0; + Operand &opnd = insn.GetOperand(memIndex); + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if ((base == nullptr) || !(cgFunc.GetRD()->IsFrameReg(*base))) { + return; + } + ASSERT(memOpnd.GetIndexRegister() == nullptr, "frame MemOperand must not be exist register index"); + InsnSet memUseInsnSet; + for (int i = 0; i != kMaxMovNum; ++i) { + memUseInsnSet.clear(); + if (i == 0) { + regIndex = 0; + memUseInsnSet = cgFunc.GetRD()->FindUseForMemOpnd(insn, memIndex); + } else { + regIndex = 1; + memUseInsnSet = cgFunc.GetRD()->FindUseForMemOpnd(insn, memIndex, true); + } + if (memUseInsnSet.empty()) { + return; + } + auto ®Opnd = static_cast(insn.GetOperand(regIndex)); + if (regOpnd.IsZeroRegister()) { + DoLoadZeroToMoveTransfer(insn, regIndex, memUseInsnSet); + } else { + DoLoadToMoveTransfer(insn, regIndex, i, memUseInsnSet); + } + } +} + +void AArch64StoreLoadOpt::ProcessStr(Insn &insn) { + /* str x100, [mem], mem index is 1, x100 index is 0; */ + const short memIndex = 1; + const short regIndex = 0; + Operand &opnd = insn.GetOperand(memIndex); + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if ((base == nullptr) || !(cgFunc.GetRD()->IsFrameReg(*base))) { + return; + } + ASSERT(memOpnd.GetIndexRegister() == nullptr, "frame MemOperand must not be exist register index"); + + InsnSet memUseInsnSet = cgFunc.GetRD()->FindUseForMemOpnd(insn, memIndex); + if (memUseInsnSet.empty()) { + return; + } + + auto *regOpnd = static_cast(insn.GetOpnd(regIndex)); + CHECK_NULL_FATAL(regOpnd); + if (regOpnd->IsZeroRegister()) { + DoLoadZeroToMoveTransfer(insn, regIndex, memUseInsnSet); + } else { + DoLoadToMoveTransfer(insn, regIndex, 0, memUseInsnSet); + } +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/cfgo.cpp b/src/maple_be/src/cg/cfgo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91c1ea81db93678eb2476af240d14e5bcc57059c --- /dev/null +++ b/src/maple_be/src/cg/cfgo.cpp @@ -0,0 +1,773 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +#include "cfgo.h" +#include "cgbb.h" +#include "cg.h" +#include "aarch64_insn.h" +#include "mpl_logging.h" + +/* + * This phase traverses all basic block of cgFunc and finds special + * basic block patterns, like continuous fallthrough basic block, continuous + * uncondition jump basic block, unreachable basic block and empty basic block, + * then do basic mergering, basic block placement transformations, + * unnecessary jumps elimination, and remove unreachable or empty basic block. + * This optimization is done on control flow graph basis. + */ +namespace maplebe { +using namespace maple; + +#define CFGO_DUMP CG_DEBUG_FUNC(cgFunc) + +void CFGOptimizer::InitOptimizePatterns() { + /* Initialize cfg optimization patterns */ + diffPassPatterns.push_back(memPool->New(*cgFunc)); + diffPassPatterns.push_back(memPool->New(*cgFunc)); + diffPassPatterns.push_back(memPool->New(*cgFunc)); + diffPassPatterns.push_back(memPool->New(*cgFunc)); + diffPassPatterns.push_back(memPool->New(*cgFunc)); + diffPassPatterns.push_back(memPool->New(*cgFunc)); +} + +/* return true if to is put after from and there is no real insns between from and to, */ +bool ChainingPattern::NoInsnBetween(const BB &from, const BB &to) const { + const BB *bb = nullptr; + for (bb = from.GetNext(); bb != nullptr && bb != &to && bb != cgFunc->GetLastBB(); bb = bb->GetNext()) { + if (!bb->IsEmptyOrCommentOnly() || bb->IsUnreachable() || bb->GetKind() != BB::kBBFallthru) { + return false; + } + } + return (bb == &to); +} + +/* return true if insns in bb1 and bb2 are the same except the last goto insn. */ +bool ChainingPattern::DoSameThing(const BB &bb1, const Insn &last1, const BB &bb2, const Insn &last2) const { + const Insn *insn1 = bb1.GetFirstInsn(); + const Insn *insn2 = bb2.GetFirstInsn(); + while (insn1 != nullptr && insn1 != last1.GetNext() && insn2 != nullptr && insn2 != last2.GetNext()) { + if (!insn1->IsMachineInstruction()) { + insn1 = insn1->GetNext(); + continue; + } + if (!insn2->IsMachineInstruction()) { + insn2 = insn2->GetNext(); + continue; + } + if (insn1->GetMachineOpcode() != insn2->GetMachineOpcode()) { + return false; + } + uint32 opndNum = insn1->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &op1 = insn1->GetOperand(i); + Operand &op2 = insn2->GetOperand(i); + if (&op1 == &op2) { + continue; + } + if (!op1.Equals(op2)) { + return false; + } + } + insn1 = insn1->GetNext(); + insn2 = insn2->GetNext(); + } + return (insn1 == last1.GetNext() && insn2 == last2.GetNext()); +} + +/* + * BB2 can be merged into BB1, if + * 1. BB1's kind is fallthrough; + * 2. BB2 has only one predecessor which is BB1 and BB2 is not the lastbb + * 3. BB2 is neither catch BB nor switch case BB + */ +bool ChainingPattern::MergeFallthuBB(BB &curBB) { + BB *sucBB = curBB.GetNext(); + if (sucBB == nullptr || + IsLabelInLSDAOrSwitchTable(sucBB->GetLabIdx()) || + !cgFunc->GetTheCFG()->CanMerge(curBB, *sucBB)) { + return false; + } + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + if (sucBB == cgFunc->GetLastBB()) { + cgFunc->SetLastBB(curBB); + } + cgFunc->GetTheCFG()->MergeBB(curBB, *sucBB, *cgFunc); + keepPosition = true; + return true; +} + +bool ChainingPattern::MergeGotoBB(BB &curBB, BB &sucBB) { + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + cgFunc->GetTheCFG()->MergeBB(curBB, sucBB, *cgFunc); + keepPosition = true; + return true; +} + +bool ChainingPattern::MoveSuccBBAsCurBBNext(BB &curBB, BB &sucBB) { + /* + * without the judge below, there is + * Assembler Error: CFI state restore without previous remember + */ + if (sucBB.GetFirstInsn() != nullptr && sucBB.GetFirstInsn()->IsCfiInsn()) { + return false; + } + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + /* put sucBB as curBB's next. */ + ASSERT(sucBB.GetPrev() != nullptr, "the target of current goto BB will not be the first bb"); + sucBB.GetPrev()->SetNext(sucBB.GetNext()); + if (sucBB.GetNext() != nullptr) { + sucBB.GetNext()->SetPrev(sucBB.GetPrev()); + } + sucBB.SetNext(curBB.GetNext()); + ASSERT(curBB.GetNext() != nullptr, "current goto BB will not be the last bb"); + curBB.GetNext()->SetPrev(&sucBB); + sucBB.SetPrev(&curBB); + curBB.SetNext(&sucBB); + curBB.RemoveInsn(*curBB.GetLastInsn()); + curBB.SetKind(BB::kBBFallthru); + return true; +} + +bool ChainingPattern::RemoveGotoInsn(BB &curBB, BB &sucBB) { + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + if (&sucBB != curBB.GetNext()) { + ASSERT(curBB.GetNext() != nullptr, "nullptr check"); + curBB.RemoveSuccs(sucBB); + curBB.PushBackSuccs(*curBB.GetNext()); + curBB.GetNext()->PushBackPreds(curBB); + sucBB.RemovePreds(curBB); + } + curBB.RemoveInsn(*curBB.GetLastInsn()); + curBB.SetKind(BB::kBBFallthru); + return true; +} + +bool ChainingPattern::ClearCurBBAndResetTargetBB(BB &curBB, BB &sucBB) { + if (curBB.GetFirstInsn() != nullptr && curBB.GetFirstInsn()->IsCfiInsn()) { + return false; + } + Insn *brInsn = nullptr; + for (brInsn = curBB.GetLastInsn(); brInsn != nullptr; brInsn = brInsn->GetPrev()) { + if (brInsn->IsGoto()) { + break; + } + } + ASSERT(brInsn != nullptr, "goto BB has no branch"); + BB *newTarget = sucBB.GetPrev(); + ASSERT(newTarget != nullptr, "get prev bb failed in ChainingPattern::ClearCurBBAndResetTargetBB"); + Insn *last1 = newTarget->GetLastInsn(); + if (newTarget->GetKind() == BB::kBBGoto) { + Insn *br = nullptr; + for (br = newTarget->GetLastInsn(); br != newTarget->GetFirstInsn()->GetPrev(); br = br->GetPrev()) { + if (br->IsGoto()) { + break; + } + } + ASSERT(br != nullptr, "goto BB has no branch"); + last1 = br->GetPrev(); + } + if (last1 == nullptr || !DoSameThing(*newTarget, *last1, curBB, *brInsn->GetPrev())) { + return false; + } + + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + + LabelIdx tgtLabIdx = newTarget->GetLabIdx(); + if (newTarget->GetLabIdx() == MIRLabelTable::GetDummyLabel()) { + tgtLabIdx = cgFunc->CreateLabel(); + newTarget->AddLabel(tgtLabIdx); + } + LabelOperand &brTarget = cgFunc->GetOrCreateLabelOperand(tgtLabIdx); + brInsn->SetOperand(0, brTarget); + curBB.RemoveInsnSequence(*curBB.GetFirstInsn(), *brInsn->GetPrev()); + + curBB.RemoveFromSuccessorList(sucBB); + curBB.PushBackSuccs(*newTarget); + sucBB.RemoveFromPredecessorList(curBB); + newTarget->PushBackPreds(curBB); + + sucBB.GetPrev()->SetUnreachable(false); + keepPosition = true; + return true; +} + +/* + * Following optimizations are performed: + * 1. Basic block merging + * 2. unnecessary jumps elimination + * 3. Remove duplicates Basic block. + */ +bool ChainingPattern::Optimize(BB &curBB) { + if (curBB.GetKind() == BB::kBBFallthru) { + return MergeFallthuBB(curBB); + } + + if (curBB.GetKind() == BB::kBBGoto && !curBB.IsEmpty()) { + BB *sucBB = cgFunc->GetTheCFG()->GetTargetSuc(curBB); + /* + * BB2 can be merged into BB1, if + * 1. BB1 ends with a goto; + * 2. BB2 has only one predecessor which is BB1 + * 3. BB2 is of goto kind. Otherwise, the original fall through will be broken + * 4. BB2 is neither catch BB nor switch case BB + */ + if (sucBB == nullptr) { + return false; + } + if (sucBB->GetKind() == BB::kBBGoto && + !IsLabelInLSDAOrSwitchTable(sucBB->GetLabIdx()) && + cgFunc->GetTheCFG()->CanMerge(curBB, *sucBB)) { + return MergeGotoBB(curBB, *sucBB); + } else if (sucBB != &curBB && + curBB.GetNext() != sucBB && + !sucBB->IsPredecessor(*sucBB->GetPrev()) && + !(sucBB->GetNext() != nullptr && + sucBB->GetNext()->IsPredecessor(*sucBB)) && + !IsLabelInLSDAOrSwitchTable(sucBB->GetLabIdx()) && + sucBB->GetEhSuccs().empty() && + sucBB->GetKind() != BB::kBBThrow) { + return MoveSuccBBAsCurBBNext(curBB, *sucBB); + } + /* + * Last goto instruction can be removed, if: + * 1. The goto target is physically the next one to current BB. + */ + else if (sucBB == curBB.GetNext() || + (NoInsnBetween(curBB, *sucBB) && !IsLabelInLSDAOrSwitchTable(curBB.GetNext()->GetLabIdx()))) { + return RemoveGotoInsn(curBB, *sucBB); + } + /* + * Clear curBB and target it to sucBB->GetPrev() + * if sucBB->GetPrev() and curBB's insns are the same. + * + * curBB: curBB: + * insn_x0 b prevbb + * b sucBB ... + * ... ==> prevbb: + * prevbb: insn_x0 + * insn_x0 sucBB: + * sucBB: + */ + else if (sucBB != curBB.GetNext() && + !curBB.IsSoloGoto() && + !IsLabelInLSDAOrSwitchTable(curBB.GetLabIdx()) && + sucBB->GetKind() == BB::kBBReturn && + sucBB->GetPreds().size() > 1 && + sucBB->GetPrev() != nullptr && + sucBB->IsPredecessor(*sucBB->GetPrev()) && + (sucBB->GetPrev()->GetKind() == BB::kBBFallthru || sucBB->GetPrev()->GetKind() == BB::kBBGoto)) { + return ClearCurBBAndResetTargetBB(curBB, *sucBB); + } + } + return false; +} + +/* + * curBB: curBB: + * insn_x0 insn_x0 + * b targetBB b BB + * ... ==> ... + * targetBB: targetBB: + * b BB b BB + * ... ... + * BB: BB: + * *------------------------------ + * curBB: curBB: + * insn_x0 insn_x0 + * cond_br brBB cond_br BB + * ... ... + * brBB: ==> brBB: + * b BB b BB + * ... ... + * BB: BB: + * + * conditions: + * 1. only goto and comment in brBB; + */ +bool SequentialJumpPattern::Optimize(BB &curBB) { + if (curBB.IsUnreachable()) { + return false; + } + if (curBB.GetKind() == BB::kBBGoto && !curBB.IsEmpty()) { + BB *sucBB = cgFunc->GetTheCFG()->GetTargetSuc(curBB); + CHECK_FATAL(sucBB != nullptr, "sucBB is null in SequentialJumpPattern::Optimize"); + if (sucBB->IsSoloGoto() && cgFunc->GetTheCFG()->GetTargetSuc(*sucBB) != nullptr) { + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + cgFunc->GetTheCFG()->RetargetJump(*sucBB, curBB); + SkipSucBB(curBB, *sucBB); + return true; + } + } else if (curBB.GetKind() == BB::kBBIf) { + for (BB *sucBB : curBB.GetSuccs()) { + if (sucBB != curBB.GetNext() && sucBB->IsSoloGoto() && + cgFunc->GetTheCFG()->GetTargetSuc(*sucBB) != nullptr) { + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + cgFunc->GetTheCFG()->RetargetJump(*sucBB, curBB); + SkipSucBB(curBB, *sucBB); + break; + } + } + return true; + } + return false; +} + +/* + * preCond: + * sucBB is one of curBB's successor. + * + * Change curBB's successor to sucBB's successor + */ +void SequentialJumpPattern::SkipSucBB(BB &curBB, BB &sucBB) { + BB *gotoTarget = cgFunc->GetTheCFG()->GetTargetSuc(sucBB); + CHECK_FATAL(gotoTarget != nullptr, "gotoTarget is null in SequentialJumpPattern::SkipSucBB"); + curBB.RemoveSuccs(sucBB); + curBB.PushBackSuccs(*gotoTarget); + sucBB.RemovePreds(curBB); + gotoTarget->PushBackPreds(curBB); + cgFunc->GetTheCFG()->FlushUnReachableStatusAndRemoveRelations(sucBB, *cgFunc); +} + +/* + * Found pattern + * curBB: curBB: + * ... ==> ... + * cond_br brBB cond1_br ftBB + * ftBB: brBB: + * bl throwfunc ... + * brBB: retBB: + * ... ... + * retBB: ftBB: + * ... bl throwfunc + */ +void FlipBRPattern::RelocateThrowBB(BB &curBB) { + BB *ftBB = curBB.GetNext(); + CHECK_FATAL(ftBB != nullptr, "ifBB has a fall through BB"); + CGCFG *theCFG = cgFunc->GetTheCFG(); + CHECK_FATAL(theCFG != nullptr, "nullptr check"); + BB *retBB = theCFG->FindLastRetBB(); + CHECK_FATAL(retBB != nullptr, "must have a return BB"); + if (ftBB->GetKind() != BB::kBBThrow || !ftBB->GetEhSuccs().empty() || + IsLabelInLSDAOrSwitchTable(ftBB->GetLabIdx()) || !retBB->GetEhSuccs().empty()) { + return; + } + BB *brBB = theCFG->GetTargetSuc(curBB); + if (brBB != ftBB->GetNext()) { + return; + } + if (cgFunc->GetEHFunc() != nullptr && cgFunc->GetEHFunc()->GetLSDACallSiteTable() != nullptr) { + const MapleVector &callsiteTable = cgFunc->GetEHFunc()->GetLSDACallSiteTable()->GetCallSiteTable(); + for (size_t i = 0; i < callsiteTable.size(); ++i) { + LSDACallSite *lsdaCallsite = callsiteTable[i]; + BB *endTry = cgFunc->GetBBFromLab2BBMap(lsdaCallsite->csLength.GetEndOffset()->GetLabelIdx()); + BB *startTry = cgFunc->GetBBFromLab2BBMap(lsdaCallsite->csLength.GetStartOffset()->GetLabelIdx()); + if (retBB->GetId() >= startTry->GetId() && retBB->GetId() <= endTry->GetId()) { + if (retBB->GetNext()->GetId() < startTry->GetId() || retBB->GetNext()->GetId() > endTry->GetId() || + curBB.GetId() < startTry->GetId() || curBB.GetId() > endTry->GetId()) { + return; + } + } else { + if ((retBB->GetNext()->GetId() >= startTry->GetId() && retBB->GetNext()->GetId() <= endTry->GetId()) || + (curBB.GetId() >= startTry->GetId() && curBB.GetId() <= endTry->GetId())) { + return; + } + } + } + } + /* get branch insn of curBB */ + Insn *curBBBranchInsn = theCFG->FindLastCondBrInsn(curBB); + CHECK_FATAL(curBBBranchInsn != nullptr, "curBB(it is a kBBif) has no branch"); + + /* Reverse the branch */ + int targetIdx = 1; + MOperator mOp = theCFG->GetInsnModifier()->FlipConditionOp(curBBBranchInsn->GetMachineOpcode(), targetIdx); + LabelOperand &brTarget = cgFunc->GetOrCreateLabelOperand(*ftBB); + curBBBranchInsn->SetMOperator(mOp); + curBBBranchInsn->SetOperand(targetIdx, brTarget); + + /* move ftBB after retBB */ + curBB.SetNext(brBB); + brBB->SetPrev(&curBB); + + retBB->GetNext()->SetPrev(ftBB); + ftBB->SetNext(retBB->GetNext()); + ftBB->SetPrev(retBB); + retBB->SetNext(ftBB); +} + +/* + * 1. relocate goto BB + * Found pattern (1) ftBB->GetPreds().size() == 1 + * curBB: curBB: cond1_br target + * ... ==> brBB: + * cond_br brBB ... + * ftBB: targetBB: (ftBB,targetBB) + * goto target (2) ftBB->GetPreds().size() > 1 + * brBB: curBB : cond1_br ftBB + * ... brBB: + * targetBB ... + * ftBB + * targetBB + * + * 2. relocate throw BB in RelocateThrowBB() + */ +bool FlipBRPattern::Optimize(BB &curBB) { + if (curBB.GetKind() == BB::kBBIf && !curBB.IsEmpty()) { + BB *ftBB = curBB.GetNext(); + ASSERT(ftBB != nullptr, "ftBB is null in FlipBRPattern::Optimize"); + BB *brBB = cgFunc->GetTheCFG()->GetTargetSuc(curBB); + ASSERT(brBB != nullptr, "brBB is null in FlipBRPattern::Optimize"); + /* Check if it can be optimized */ + if (ftBB->GetKind() == BB::kBBGoto && ftBB->GetNext() == brBB) { + if (!ftBB->GetEhSuccs().empty()) { + return false; + } + Insn *curBBBranchInsn = nullptr; + for (curBBBranchInsn = curBB.GetLastInsn(); curBBBranchInsn != nullptr; + curBBBranchInsn = curBBBranchInsn->GetPrev()) { + if (curBBBranchInsn->IsBranch()) { + break; + } + } + ASSERT(curBBBranchInsn != nullptr, "FlipBRPattern: curBB has no branch"); + Insn *brInsn = nullptr; + for (brInsn = ftBB->GetLastInsn(); brInsn != nullptr; brInsn = brInsn->GetPrev()) { + if (brInsn->IsGoto()) { + break; + } + } + ASSERT(brInsn != nullptr, "FlipBRPattern: ftBB has no branch"); + + /* Reverse the branch */ + int targetIdx = 1; + MOperator mOp = cgFunc->GetTheCFG()->GetInsnModifier()->FlipConditionOp(curBBBranchInsn->GetMachineOpcode(), + targetIdx); + if (mOp == 0) { + return false; + } + auto it = ftBB->GetSuccsBegin(); + BB *tgtBB = *it; + if (ftBB->GetPreds().size() == 1 && + (ftBB->IsSoloGoto() || + (!IsLabelInLSDAOrSwitchTable(tgtBB->GetLabIdx()) && + cgFunc->GetTheCFG()->CanMerge(*ftBB, *tgtBB)))) { + curBBBranchInsn->SetMOperator(mOp); + Operand &brTarget = brInsn->GetOperand(0); + curBBBranchInsn->SetOperand(targetIdx, brTarget); + /* Insert ftBB's insn at the beginning of tgtBB. */ + if (!ftBB->IsSoloGoto()) { + ftBB->RemoveInsn(*brInsn); + tgtBB->InsertAtBeginning(*ftBB); + } + /* Patch pred and succ lists */ + ftBB->EraseSuccs(it); + ftBB->PushBackSuccs(*brBB); + it = curBB.GetSuccsBegin(); + CHECK_FATAL(*it != nullptr, "nullptr check"); + if (*it == brBB) { + curBB.EraseSuccs(it); + curBB.PushBackSuccs(*tgtBB); + } else { + ++it; + curBB.EraseSuccs(it); + curBB.PushFrontSuccs(*tgtBB); + } + for (it = tgtBB->GetPredsBegin(); it != tgtBB->GetPredsEnd(); ++it) { + if (*it == ftBB) { + tgtBB->ErasePreds(it); + break; + } + } + tgtBB->PushBackPreds(curBB); + for (it = brBB->GetPredsBegin(); it != brBB->GetPredsEnd(); ++it) { + if (*it == &curBB) { + brBB->ErasePreds(it); + break; + } + } + brBB->PushFrontPreds(*ftBB); + /* Remove instructions from ftBB so curBB falls thru to brBB */ + ftBB->SetFirstInsn(nullptr); + ftBB->SetLastInsn(nullptr); + ftBB->SetKind(BB::kBBFallthru); + } else if (!IsLabelInLSDAOrSwitchTable(ftBB->GetLabIdx()) && + !tgtBB->IsPredecessor(*tgtBB->GetPrev())) { + curBBBranchInsn->SetMOperator(mOp); + LabelIdx tgtLabIdx = ftBB->GetLabIdx(); + if (ftBB->GetLabIdx() == MIRLabelTable::GetDummyLabel()) { + tgtLabIdx = cgFunc->CreateLabel(); + ftBB->AddLabel(tgtLabIdx); + } + LabelOperand &brTarget = cgFunc->GetOrCreateLabelOperand(tgtLabIdx); + curBBBranchInsn->SetOperand(targetIdx, brTarget); + curBB.SetNext(brBB); + brBB->SetPrev(&curBB); + ftBB->SetPrev(tgtBB->GetPrev()); + tgtBB->GetPrev()->SetNext(ftBB); + ftBB->SetNext(tgtBB); + tgtBB->SetPrev(ftBB); + + ftBB->RemoveInsn(*brInsn); + ftBB->SetKind(BB::kBBFallthru); + } + } else { + RelocateThrowBB(curBB); + } + } + return false; +} + +/* remove a basic block that contains nothing */ +bool EmptyBBPattern::Optimize(BB &curBB) { + if (curBB.IsUnreachable()) { + return false; + } + /* Empty bb but do not have cleanup label. */ + if (curBB.GetPrev() != nullptr && curBB.GetFirstStmt() != cgFunc->GetCleanupLabel() && + curBB.GetFirstInsn() == nullptr && curBB.GetLastInsn() == nullptr && &curBB != cgFunc->GetLastBB() && + curBB.GetKind() != BB::kBBReturn && !IsLabelInLSDAOrSwitchTable(curBB.GetLabIdx())) { + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + if (cgFunc->GetTheCFG()->GetTargetSuc(curBB) == nullptr) { + ERR(kLncErr, "null ptr check"); + return false; + } + if (cgFunc->GetTheCFG()->GetTargetSuc(curBB)->GetFirstStmt() == cgFunc->GetCleanupLabel()) { + return false; + } + cgFunc->GetTheCFG()->RemoveBB(curBB); + return true; + } + return false; +} + +/* + * remove unreachable BB + * condition: + * 1. unreachable BB can't have cfi instruction when postcfgo. + */ +bool UnreachBBPattern::Optimize(BB &curBB) { + if (curBB.IsUnreachable()) { + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + + /* if curBB in exitbbsvec,return false. */ + EHFunc *ehFunc = cgFunc->GetEHFunc(); + ASSERT(ehFunc != nullptr, "get ehfunc failed in UnreachBBPattern::Optimize"); + if (cgFunc->IsExitBB(curBB)) { + curBB.SetUnreachable(false); + return false; + } + + if (curBB.GetFirstInsn() != nullptr && curBB.GetFirstInsn()->IsCfiInsn()) { + return false; + } + + /* if curBB InLSDA ,replace curBB's label with nextReachableBB before remove it. */ + if (ehFunc->NeedFullLSDA() && cgFunc->GetTheCFG()->InLSDA(curBB.GetLabIdx(), *ehFunc)) { + /* find nextReachableBB */ + BB *nextReachableBB = nullptr; + for (BB *bb = &curBB; bb != nullptr; bb = bb->GetNext()) { + if (!bb->IsUnreachable()) { + nextReachableBB = bb; + break; + } + } + CHECK_FATAL(nextReachableBB != nullptr, "nextReachableBB not be nullptr"); + if (nextReachableBB->GetLabIdx() == 0) { + LabelIdx labIdx = cgFunc->CreateLabel(); + nextReachableBB->AddLabel(labIdx); + cgFunc->SetLab2BBMap(labIdx, *nextReachableBB); + } + + ehFunc->GetLSDACallSiteTable()->UpdateCallSite(curBB, *nextReachableBB); + } + + ASSERT(curBB.GetPrev() != nullptr, "nullptr check"); + curBB.GetPrev()->SetNext(curBB.GetNext()); + ASSERT(curBB.GetNext() != nullptr, "nullptr check"); + curBB.GetNext()->SetPrev(curBB.GetPrev()); + + /* flush after remove; */ + for (BB *bb : curBB.GetSuccs()) { + bb->RemovePreds(curBB); + cgFunc->GetTheCFG()->FlushUnReachableStatusAndRemoveRelations(*bb, *cgFunc); + } + for (BB *bb : curBB.GetEhSuccs()) { + bb->RemoveEhPreds(curBB); + cgFunc->GetTheCFG()->FlushUnReachableStatusAndRemoveRelations(*bb, *cgFunc); + } + curBB.ClearSuccs(); + curBB.ClearEhSuccs(); + return true; + } + return false; +} + +/* BB_pred1: BB_pred1: + * b curBB insn_x0 + * ... b BB2 + * BB_pred2: ==> ... + * b curBB BB_pred2: + * ... insn_x0 + * curBB: b BB2 + * insn_x0 ... + * b BB2 curBB: + * insn_x0 + * b BB2 + * condition: + * 1. The number of instruct in curBB + * is less than THRESHOLD; + * 2. curBB can't have cfi instruction when postcfgo. + */ +bool DuplicateBBPattern::Optimize(BB &curBB) { + if (curBB.IsUnreachable()) { + return false; + } + if (CGOptions::IsNoDupBB()) { + return false; + } + + /* curBB can't be in try block */ + if (curBB.GetKind() != BB::kBBGoto || IsLabelInLSDAOrSwitchTable(curBB.GetLabIdx()) || + !curBB.GetEhSuccs().empty()) { + return false; + } + +#if TARGARM32 + FOR_BB_INSNS(insn, (&curBB)) { + if (insn->IsPCLoad()) { + return false; + } + } +#endif + /* It is possible curBB jump to itself */ + uint32 numPreds = curBB.NumPreds(); + for (BB *bb : curBB.GetPreds()) { + if (bb == &curBB) { + numPreds--; + } + } + + if (numPreds > 1 && cgFunc->GetTheCFG()->GetTargetSuc(curBB) != nullptr && + cgFunc->GetTheCFG()->GetTargetSuc(curBB)->NumPreds() > 1) { + std::vector candidates; + for (BB *bb : curBB.GetPreds()) { + if (bb->GetKind() == BB::kBBGoto && bb->GetNext() != &curBB && bb != &curBB && !bb->IsEmpty()) { + candidates.push_back(bb); + } + } + if (candidates.empty()) { + return false; + } + if (curBB.NumInsn() <= kThreshold) { + if (curBB.GetFirstInsn() != nullptr && curBB.GetFirstInsn()->IsCfiInsn()) { + return false; + } + Log(curBB.GetId()); + if (checkOnly) { + return false; + } + for (BB *bb : candidates) { + bb->RemoveInsn(*bb->GetLastInsn()); + FOR_BB_INSNS(insn, (&curBB)) { + Insn *clonedInsn = cgFunc->GetTheCFG()->CloneInsn(*insn); + clonedInsn->SetPrev(nullptr); + clonedInsn->SetNext(nullptr); + clonedInsn->SetBB(nullptr); + bb->AppendInsn(*clonedInsn); + } + bb->RemoveSuccs(curBB); + for (BB *item : curBB.GetSuccs()) { + bb->PushBackSuccs(*item); + item->PushBackPreds(*bb); + } + curBB.RemovePreds(*bb); + } + cgFunc->GetTheCFG()->FlushUnReachableStatusAndRemoveRelations(curBB, *cgFunc); + return true; + } + } + return false; +} + +AnalysisResult *CgDoCfgo::Run(CGFunc *cgFunc, CgFuncResultMgr *cgFuncResultMgr) { + (void)cgFuncResultMgr; + MemPool *cfgMemPool = NewMemPool(); + CFGOptimizer *cfgOptimizer = cfgMemPool->New(*cgFunc, *cfgMemPool); + std::string funcClass = cgFunc->GetFunction().GetBaseClassName(); + std::string funcName = cgFunc->GetFunction().GetBaseFuncName(); + std::string name = funcClass + funcName; + const std::string phaseName = PhaseName(); + + if (CFGO_DUMP) { + DotGenerator::GenerateDot("before-cfgo", *cgFunc, cgFunc->GetMirModule()); + } + + cfgOptimizer->Run(name); + + if (CFGO_DUMP) { + DotGenerator::GenerateDot("after-cfgo", *cgFunc, cgFunc->GetMirModule()); + } + + return nullptr; +} + +AnalysisResult *CgDoPostCfgo::Run(CGFunc *cgFunc, CgFuncResultMgr *cgFuncResultMgr) { + (void)cgFuncResultMgr; + MemPool *cfgMemPool = NewMemPool(); + CFGOptimizer *cfgOptimizer = cfgMemPool->New(*cgFunc, *cfgMemPool); + std::string funcClass = cgFunc->GetFunction().GetBaseClassName(); + std::string funcName = cgFunc->GetFunction().GetBaseFuncName(); + std::string name = funcClass + funcName; + const std::string phaseName = PhaseName(); + + if (CFGO_DUMP) { + DotGenerator::GenerateDot("before-postcfgo", *cgFunc, cgFunc->GetMirModule()); + } + + cfgOptimizer->Run(name); + + if (CFGO_DUMP) { + DotGenerator::GenerateDot("after-postcfgo", *cgFunc, cgFunc->GetMirModule()); + } + + return nullptr; +} +} /* namespace maplebe */ diff --git a/src/maple_be/src/cg/cg_cfg.cpp b/src/maple_be/src/cg/cg_cfg.cpp index c36627e2b640c049c3b1ca9767e4a19513200676..02cd4347d39c6faf4e9070984c223e9fe9142f67 100644 --- a/src/maple_be/src/cg/cg_cfg.cpp +++ b/src/maple_be/src/cg/cg_cfg.cpp @@ -125,6 +125,22 @@ void CGCFG::BuildCFG() { } } } + +InsnVisitor *CGCFG::insnVisitor; + +void CGCFG::InitInsnVisitor(CGFunc &func) { + insnVisitor = func.NewInsnModifier(); +} + +Insn *CGCFG::CloneInsn(Insn &originalInsn) { + cgFunc->IncTotalNumberOfInstructions(); + return insnVisitor->CloneInsn(originalInsn); +} + +RegOperand *CGCFG::CreateVregFromReg(const RegOperand &pReg) { + return insnVisitor->CreateVregFromReg(pReg); +} + /* * return true if: * mergee has only one predecessor which is merger, or mergee has @@ -338,6 +354,98 @@ void CGCFG::FlushUnReachableStatusAndRemoveRelations(BB &bb, const CGFunc &func) it->ClearEhSuccs(); } } + +void CGCFG::RemoveBB(BB &curBB, bool isGotoIf) { + BB *sucBB = CGCFG::GetTargetSuc(curBB, false, isGotoIf); + if (sucBB != nullptr) { + sucBB->RemovePreds(curBB); + } + BB *fallthruSuc = nullptr; + if (isGotoIf) { + for (BB *succ : curBB.GetSuccs()) { + if (succ == sucBB) { + continue; + } + fallthruSuc = succ; + break; + } + + ASSERT(fallthruSuc == curBB.GetNext(), "fallthru succ should be its next bb."); + if (fallthruSuc != nullptr) { + fallthruSuc->RemovePreds(curBB); + } + } + + for (BB *preBB : curBB.GetPreds()) { + /* + * If curBB is the target of its predecessor, change + * the jump target. + */ + if (&curBB == GetTargetSuc(*preBB, true, isGotoIf)) { + LabelIdx targetLabel; + if (curBB.GetNext()->GetLabIdx() == 0) { + targetLabel = insnVisitor->GetCGFunc()->CreateLabel(); + curBB.GetNext()->SetLabIdx(targetLabel); + } else { + targetLabel = curBB.GetNext()->GetLabIdx(); + } + insnVisitor->ModifyJumpTarget(targetLabel, *preBB); + } + if (fallthruSuc != nullptr && !fallthruSuc->IsPredecessor(*preBB)) { + preBB->PushBackSuccs(*fallthruSuc); + fallthruSuc->PushBackPreds(*preBB); + } + if (sucBB != nullptr && !sucBB->IsPredecessor(*preBB)) { + preBB->PushBackSuccs(*sucBB); + sucBB->PushBackPreds(*preBB); + } + preBB->RemoveSuccs(curBB); + } + for (BB *ehSucc : curBB.GetEhSuccs()) { + ehSucc->RemoveEhPreds(curBB); + } + for (BB *ehPred : curBB.GetEhPreds()) { + ehPred->RemoveEhSuccs(curBB); + } + curBB.GetNext()->RemovePreds(curBB); + curBB.GetPrev()->SetNext(curBB.GetNext()); + curBB.GetNext()->SetPrev(curBB.GetPrev()); +} + +void CGCFG::RetargetJump(BB &srcBB, BB &targetBB) { + insnVisitor->ModifyJumpTarget(srcBB, targetBB); +} + +BB *CGCFG::GetTargetSuc(BB &curBB, bool branchOnly, bool isGotoIf) { + switch (curBB.GetKind()) { + case BB::kBBGoto: + case BB::kBBIntrinsic: + case BB::kBBIf: { + const Insn* origLastInsn = curBB.GetLastInsn(); + if (isGotoIf && (curBB.GetPrev() != nullptr) && + (curBB.GetKind() == BB::kBBGoto || curBB.GetKind() == BB::kBBIf) && + (curBB.GetPrev()->GetKind() == BB::kBBGoto || curBB.GetPrev()->GetKind() == BB::kBBIf)) { + origLastInsn = curBB.GetPrev()->GetLastInsn(); + } + LabelIdx label = insnVisitor->GetJumpLabel(*origLastInsn); + for (BB *bb : curBB.GetSuccs()) { + if (bb->GetLabIdx() == label) { + return bb; + } + } + break; + } + case BB::kBBFallthru: { + return (branchOnly ? nullptr : curBB.GetNext()); + } + case BB::kBBThrow: + return nullptr; + default: + return nullptr; + } + return nullptr; +} + bool CGCFG::InLSDA(LabelIdx label, const EHFunc &ehFunc) { if (!label || ehFunc.GetLSDACallSiteTable() == nullptr) { return false; @@ -366,6 +474,11 @@ bool CGCFG::InSwitchTable(LabelIdx label, const CGFunc &func) { } return false; } + +bool CGCFG::IsCompareAndBranchInsn(const Insn &insn) const { + return insnVisitor->IsCompareAndBranchInsn(insn); +} + Insn *CGCFG::FindLastCondBrInsn(BB &bb) const { if (bb.GetKind() != BB::kBBIf) { return nullptr; diff --git a/src/maple_be/src/cg/cg_option.cpp b/src/maple_be/src/cg/cg_option.cpp index a5249f9fde3af6c453aabf11affa8be33a3c6456..7395c1cdbde07a2091076ee2aef4b3bae6893037 100644 --- a/src/maple_be/src/cg/cg_option.cpp +++ b/src/maple_be/src/cg/cg_option.cpp @@ -37,21 +37,31 @@ std::unordered_map> CGOptions::cyclePatter std::string CGOptions::skipFrom = ""; std::string CGOptions::skipAfter = ""; std::string CGOptions::dumpFunc = "*"; +std::string CGOptions::globalVarProfile = ""; #ifdef TARGARM32 std::string CGOptions::duplicateAsmFile = ""; #else std::string CGOptions::duplicateAsmFile = "maple/mrt/codetricks/arch/arm64/duplicateFunc.s"; #endif -std::string CGOptions::globalVarProfile = ""; #if TARGAARCH64 bool CGOptions::useBarriersForVolatile = false; #else bool CGOptions::useBarriersForVolatile = true; #endif -bool CGOptions::quiet = true; bool CGOptions::exclusiveEH = false; +bool CGOptions::doEBO = false; +bool CGOptions::doCFGO = false; +bool CGOptions::doICO = false; +bool CGOptions::doStoreLoadOpt = false; +bool CGOptions::doGlobalOpt = false; +bool CGOptions::doPrePeephole = false; +bool CGOptions::doPeephole = false; +bool CGOptions::doSchedule = false; +bool CGOptions::doWriteRefFieldOpt = false; +bool CGOptions::dumpOptimizeCommonLog = false; bool CGOptions::checkArrayStore = false; bool CGOptions::doPIC = false; +bool CGOptions::noDupBB = false; bool CGOptions::noCalleeCFI = true; bool CGOptions::emitCyclePattern = false; bool CGOptions::insertYieldPoint = false; @@ -61,8 +71,13 @@ bool CGOptions::nativeOpt = false; bool CGOptions::withDwarf = false; bool CGOptions::lazyBinding = false; bool CGOptions::hotFix = false; +bool CGOptions::debugSched = false; +bool CGOptions::bruteForceSched = false; +bool CGOptions::simulateSched = false; bool CGOptions::genLongCalls = false; bool CGOptions::gcOnly = false; +bool CGOptions::quiet = true; + enum OptionIndex : uint64 { kCGQuiet = kCommonOptionEnd + 1, @@ -71,6 +86,14 @@ enum OptionIndex : uint64 { kCGVerbose, kCGMapleLinker, kCgen, + kEbo, + kCfgo, + kIco, + kSlo, + kGo, + kPrepeep, + kPeep, + kSchedule, kCGNativeOpt, kInsertCall, kTrace, @@ -115,6 +138,9 @@ enum OptionIndex : uint64 { kCGSkipAfter, kCGLazyBinding, kCGHotFix, + kDebugSched, + kBruteForceSched, + kSimulateSched, kLongCalls, }; @@ -198,6 +224,86 @@ const Descriptor kUsage[] = { " --no-hot-fix\n", "mplcg", {} }, + { kEbo, + kEnable, + "", + "ebo", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --ebo \tPerform Extend block optimization\n" + " --no-ebo\n", + "mplcg", + {} }, + { kCfgo, + kEnable, + "", + "cfgo", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --cfgo \tPerform control flow optimization\n" + " --no-cfgo\n", + "mplcg", + {} }, + { kIco, + kEnable, + "", + "ico", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --ico \tPerform if-conversion optimization\n" + " --no-ico\n", + "mplcg", + {} }, + { kSlo, + kEnable, + "", + "storeloadopt", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --storeloadopt \tPerform global store-load optimization\n" + " --no-storeloadopt\n", + "mplcg", + {} }, + { kGo, + kEnable, + "", + "globalopt", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --globalopt \tPerform global optimization\n" + " --no-globalopt\n", + "mplcg", + {} }, + { kPrepeep, + kEnable, + "", + "prepeep", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --prepeep \tPerform peephole optimization before RA\n" + " --no-prepeep\n", + "mplcg", + {} }, + { kPeep, + kEnable, + "", + "peep", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --peep \tPerform peephole optimization after RA\n" + " --no-peep\n", + "mplcg", + {} }, + { kSchedule, + kEnable, + "", + "schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --schedule \tPerform scheduling\n" + " --no-schedule\n", + "mplcg", + {} }, { kCGNativeOpt, kEnable, "", @@ -572,6 +678,36 @@ const Descriptor kUsage[] = { " --no-check-arraystore\n", "mplcg", {} }, + { kDebugSched, + kEnable, + "", + "debug-schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --debug-schedule \tdump scheduling information\n" + " --no-debug-schedule\n", + "mplcg", + {} }, + { kBruteForceSched, + kEnable, + "", + "bruteforce-schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --bruteforce-schedule \tdo brute force schedule\n" + " --no-bruteforce-schedule\n", + "mplcg", + {} }, + { kSimulateSched, + kEnable, + "", + "simulate-schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --simulate-schedule \tdo simulate schedule\n" + " --no-simulate-schedule\n", + "mplcg", + {} }, { kLongCalls, kEnable, "", @@ -803,6 +939,32 @@ bool CGOptions::SolveOptions(const std::vector