diff --git a/src/mapleall/maple_be/include/be/riscv64/riscv64_rt.h b/src/mapleall/maple_be/include/be/riscv64/riscv64_rt.h new file mode 100644 index 0000000000000000000000000000000000000000..06a51099568475a1d1fa37581c3a9208c156e730 --- /dev/null +++ b/src/mapleall/maple_be/include/be/riscv64/riscv64_rt.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_BE_AARCH64_AARCH64_RT_H +#define MAPLEBE_INCLUDE_BE_AARCH64_AARCH64_RT_H + +#include +#include "rt.h" + +namespace maplebe { +/* + * This class contains constants that describes the object (memory) layout at + * run time. + * + * WARNING: DO NOT USE `sizeof` OR `alignof`! This class runs on the host, but + * describes the target which is usually different, therefore `sizeof` and + * `alignof` does not match the sizes and alignments on the target. In the + * MapleJava project, we run `mplcg` on x86_64, but run the application on + * AArch64. + */ +class AArch64RTSupport { + public: + static const uint64_t kObjectAlignment = 8; /* Word size. Suitable for all Java types. */ + static const uint64_t kObjectHeaderSize = 8; /* java object header used by MM. */ + +#ifdef USE_32BIT_REF + static const uint32_t kRefFieldSize = 4; /* reference field in java object */ + static const uint32_t kRefFieldAlign = 4; +#else + static const uint32_t kRefFieldSize = 8; /* reference field in java object */ + static const uint32_t kRefFieldAlign = 8; +#endif /* USE_32BIT_REF */ + /* The array length offset is fixed since CONTENT_OFFSET is fixed to simplify code */ + static const int64_t kArrayLengthOffset = 12; /* shadow + monitor + [padding] */ + /* The array content offset is aligned to 8B to alow hosting of size-8B elements */ + static const int64_t kArrayContentOffset = 16; /* fixed */ + static const int64_t kGcTibOffset = -8; + static const int64_t kGcTibOffsetAbs = -kGcTibOffset; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_BE_AARCH64_AARCH64_RT_H */ \ No newline at end of file diff --git a/src/mapleall/maple_be/include/cg/riscv64/mpl_atomic.h b/src/mapleall/maple_be/include/cg/riscv64/mpl_atomic.h new file mode 100644 index 0000000000000000000000000000000000000000..f89f706597056731b81e893d8e4921cda6557d89 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/mpl_atomic.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_MPL_ATOMIC_H +#define MAPLEBE_INCLUDE_CG_AARCH64_MPL_ATOMIC_H + +#include +#include +#include "types_def.h" + +namespace maple { +enum class MemOrd : uint32 { + kNotAtomic = 0, +#define ATTR(STR) STR, +#include "memory_order_attrs.def" +#undef ATTR +}; + +MemOrd MemOrdFromU32(uint32 val); + +bool MemOrdIsAcquire(MemOrd ord); + +bool MemOrdIsRelease(MemOrd ord); +} /* namespace maple */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_MPL_ATOMIC_H */ \ No newline at end of file diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_abi.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_abi.h new file mode 100644 index 0000000000000000000000000000000000000000..b095ee925333c6e3fc0196ab6479030da29522fb --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_abi.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ABI_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ABI_H + +#include "riscv64_isa.h" +#include "types_def.h" +#include "becommon.h" + +namespace maplebe { +using namespace maple; + +namespace AArch64Abi { +constexpr int32 kNumIntParmRegs = 8; +constexpr int32 kNumFloatParmRegs = 8; +constexpr int32 kYieldPointReservedReg = 19; +constexpr uint32 kNormalUseOperandNum = 3; + +constexpr AArch64reg intReturnRegs[kNumIntParmRegs] = { R0, R1, R2, R3, R4, R5, R6, R7 }; +constexpr AArch64reg floatReturnRegs[kNumFloatParmRegs] = { V0, V1, V2, V3, V4, V5, V6, V7 }; +constexpr AArch64reg intParmRegs[kNumIntParmRegs] = { R0, R1, R2, R3, R4, R5, R6, R7 }; +constexpr AArch64reg floatParmRegs[kNumFloatParmRegs] = { V0, V1, V2, V3, V4, V5, V6, V7 }; + +/* + * Refer to ARM IHI 0055C_beta: Procedure Call Standard for + * ARM 64-bit Architecture. Section 5.5 + */ +bool IsAvailableReg(AArch64reg reg); +bool IsCalleeSavedReg(AArch64reg reg); +bool IsParamReg(AArch64reg reg); +bool IsSpillReg(AArch64reg reg); +bool IsExtraSpillReg(AArch64reg reg); +bool IsSpillRegInRA(AArch64reg regNO, bool has3RegOpnd); +} /* namespace AArch64Abi */ + +/* + * Refer to ARM IHI 0055C_beta: Procedure Call Standard for + * ARM 64-bit Architecture. Table 1. + */ +enum AArch64ArgumentClass : uint8 { + kAArch64NoClass, + kAArch64IntegerClass, + kAArch64FloatClass, + kAArch64ShortVectorClass, + kAArch64PointerClass, + kAArch64CompositeTypeHFAClass, /* Homegeneous Floating-point Aggregates */ + kAArch64CompositeTypeHVAClass, /* Homegeneous Short-Vector Aggregates */ + kAArch64MemoryClass +}; + +/* for specifying how a parameter is passed */ +struct PLocInfo { + AArch64reg reg0; /* 0 means parameter is stored on the stack */ + AArch64reg reg1; + int32 memOffset; + int32 memSize; +}; + +/* + * We use the names used in ARM IHI 0055C_beta. $ 5.4.2. + * nextGeneralRegNO (= _int_parm_num) : Next General-purpose Register number + * nextFloatRegNO (= _float_parm_num): Next SIMD and Floating-point Register Number + * nextStackArgAdress (= _last_memOffset): Next Stacked Argument Address + * for processing an incoming or outgoing parameter list + */ +class ParmLocator { + public: + /* IHI 0055C_beta $ 5.4.2 Stage A (nextStackArgAdress is set to 0, meaning "relative to the current SP") */ + explicit ParmLocator(BECommon &be) : beCommon(be) {} + + ~ParmLocator() = default; + + /* Return size of aggregate structure copy on stack. */ + int32 LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst = false); + void InitPLocInfo(PLocInfo &pLoc) const; + + private: + BECommon &beCommon; + int32 paramNum = 0; /* number of all types of parameters processed so far */ + int32 nextGeneralRegNO = 0; /* number of integer parameters processed so far */ + int32 nextFloatRegNO = 0; /* number of float parameters processed so far */ + int32 nextStackArgAdress = 0; + + AArch64reg AllocateGPRegister() { + return (nextGeneralRegNO < AArch64Abi::kNumIntParmRegs) ? AArch64Abi::intParmRegs[nextGeneralRegNO++] : kRinvalid; + } + + void AllocateTwoGPRegisters(PLocInfo &pLoc) { + if ((nextGeneralRegNO + 1) < AArch64Abi::kNumIntParmRegs) { + pLoc.reg0 = AArch64Abi::intParmRegs[nextGeneralRegNO++]; + pLoc.reg1 = AArch64Abi::intParmRegs[nextGeneralRegNO++]; + } else { + pLoc.reg0 = kRinvalid; + } + } + + AArch64reg AllocateSIMDFPRegister() { + return (nextFloatRegNO < AArch64Abi::kNumFloatParmRegs) ? AArch64Abi::floatParmRegs[nextFloatRegNO++] : kRinvalid; + } + + void RoundNGRNUpToNextEven() { + nextGeneralRegNO = static_cast((nextGeneralRegNO + 1) & ~static_cast(1)); + } + + int32 ProcessPtyAggWhenLocateNextParm(MIRType &mirType, PLocInfo &pLoc, uint64 &typeSize, int32 typeAlign); +}; + +/* given the type of the return value, determines the return mechanism */ +class ReturnMechanism { + public: + ReturnMechanism(MIRType &retType, BECommon &be); + + ~ReturnMechanism() = default; + + uint8 GetRegCount() const { + return regCount; + } + + PrimType GetPrimTypeOfReg0() const { + return primTypeOfReg0; + } + + AArch64reg GetReg0() const { + return reg0; + } + + AArch64reg GetReg1() const { + return reg1; + } + + void SetupToReturnThroughMemory() { + regCount = 1; + reg0 = R8; + primTypeOfReg0 = PTY_u64; + } + + void SetupSecondRetReg(const MIRType &retTy2); + private: + uint8 regCount; /* number of registers <= 2 storing the return value */ + AArch64reg reg0; /* first register storing the return value */ + AArch64reg reg1; /* second register storing the return value */ + PrimType primTypeOfReg0; /* the primitive type stored in reg0 */ + PrimType primTypeOfReg1; /* the primitive type stored in reg1 */ +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ABI_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_args.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_args.h new file mode 100644 index 0000000000000000000000000000000000000000..0894f33bab66abd63556363299f08c9f38c90a8d --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_args.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ARGS_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ARGS_H + +#include "args.h" +#include "riscv64_cgfunc.h" + +namespace maplebe { +using namespace maple; + +struct ArgInfo { + AArch64reg reg; + MIRType *mirTy; + uint32 symSize; + uint32 stkSize; + RegType regType; + MIRSymbol *sym; + const AArch64SymbolAlloc *symLoc; +}; + +class AArch64MoveRegArgs : public MoveRegArgs { + public: + explicit AArch64MoveRegArgs(CGFunc &func) : MoveRegArgs(func) {} + ~AArch64MoveRegArgs() override = default; + void Run() override; + + private: + RegOperand *baseReg = nullptr; + const MemSegment *lastSegment = nullptr; + void CollectRegisterArgs(std::map &argsList, std::vector &indexList) const; + ArgInfo GetArgInfo(std::map &argsList, uint32 argIndex) const; + bool IsInSameSegment(const ArgInfo &firstArgInfo, const ArgInfo &secondArgInfo) const; + void GenerateStpInsn(const ArgInfo &firstArgInfo, const ArgInfo &secondArgInfo); + void GenerateStrInsn(ArgInfo &argInfo); + void MoveRegisterArgs(); + void MoveVRegisterArgs(); + void MoveLocalRefVarToRefLocals(MIRSymbol &mirSym); + void LoadStackArgsToVReg(MIRSymbol &mirSym); + void MoveArgsToVReg(const PLocInfo &ploc, MIRSymbol &mirSym); +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ARGS_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_cc.def b/src/mapleall/maple_be/include/cg/riscv64/riscv64_cc.def new file mode 100644 index 0000000000000000000000000000000000000000..dc574e324237433f2ace741d8e9977ee475f1974 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_cc.def @@ -0,0 +1,31 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +CONDCODE(EQ) /* equal */ +CONDCODE(NE) /* not equal */ +CONDCODE(CS) /* carry set (== HS) */ +CONDCODE(HS) /* unsigned higher or same (== CS) */ +CONDCODE(CC) /* carry clear (== LO) */ +CONDCODE(LO) /* Unsigned lower (== CC) */ +CONDCODE(MI) /* Minus or negative result */ +CONDCODE(PL) /* positive or zero result */ +CONDCODE(VS) /* overflow */ +CONDCODE(VC) /* no overflow */ +CONDCODE(HI) /* unsigned higher */ +CONDCODE(LS) /* unsigned lower or same */ +CONDCODE(GE) /* signed greater than or equal */ +CONDCODE(LT) /* signed less than */ +CONDCODE(GT) /* signed greater than */ +CONDCODE(LE) /* signed less than or equal */ +CONDCODE(AL) /* always, this is the default. usually omitted. */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_cg.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_cg.h new file mode 100644 index 0000000000000000000000000000000000000000..0679f81469aee4d14510929b8ef0b2961e02033f --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_cg.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H + +#include "cg.h" +#include "riscv64_cgfunc.h" + +namespace maplebe { +constexpr int64 kShortBRDistance = (8 * 1024); +constexpr int64 kNegativeImmLowerLimit = -4096; +constexpr int32 kIntRegTypeNum = 5; + +/* Supporting classes for GCTIB merging */ +class GCTIBKey { + public: + GCTIBKey(MapleAllocator &allocator, uint32 rcHeader, std::vector &patternWords) + : header(rcHeader), bitMapWords(allocator.Adapter()) { + (void)bitMapWords.insert(bitMapWords.begin(), patternWords.begin(), patternWords.end()); + } + + ~GCTIBKey() = default; + + uint32 GetHeader() const { + return header; + } + + const MapleVector &GetBitmapWords() const { + return bitMapWords; + } + + private: + uint32 header; + MapleVector bitMapWords; +}; + +class Hasher { + public: + size_t operator()(const GCTIBKey *key) const { + CHECK_NULL_FATAL(key); + size_t hash = key->GetHeader(); + return hash; + } +}; + +class EqualFn { + public: + bool operator()(const GCTIBKey *firstKey, const GCTIBKey *secondKey) const { + CHECK_NULL_FATAL(firstKey); + CHECK_NULL_FATAL(secondKey); + const MapleVector &firstWords = firstKey->GetBitmapWords(); + const MapleVector &secondWords = secondKey->GetBitmapWords(); + + if ((firstKey->GetHeader() != secondKey->GetHeader()) || (firstWords.size() != secondWords.size())) { + return false; + } + + for (size_t i = 0; i < firstWords.size(); ++i) { + if (firstWords[i] != secondWords[i]) { + return false; + } + } + return true; + } +}; + +class GCTIBPattern { + public: + GCTIBPattern(GCTIBKey &patternKey, MemPool &mp) : name(&mp) { + key = &patternKey; + id = GetId(); + name = GCTIB_PREFIX_STR + std::string("PTN_") + std::to_string(id); + } + + ~GCTIBPattern() = default; + + int GetId() { + static int id = 0; + return id++; + } + + std::string GetName() const { + ASSERT(!name.empty(), "null name check!"); + return std::string(name.c_str()); + } + + void SetName(const std::string &ptnName) { + name = ptnName; + } + + private: + int id; + MapleString name; + GCTIBKey *key; +}; + +class AArch64CG : public CG { + public: + AArch64CG(MIRModule &mod, const CGOptions &opts, const std::vector &nameVec, + const std::unordered_map> &patternMap) + : CG(mod, opts), + ehExclusiveNameVec(nameVec), + cyclePatternMap(patternMap), + keyPatternMap(allocator.Adapter()), + symbolPatternMap(allocator.Adapter()) {} + + ~AArch64CG() override = default; + + CGFunc *CreateCGFunc(MIRModule &mod, MIRFunction &mirFunc, BECommon &bec, MemPool &memPool, + MapleAllocator &mallocator, uint32 funcId) override { + return memPool.New(mod, *this, mirFunc, bec, memPool, mallocator, funcId); + } + + const std::unordered_map> &GetCyclePatternMap() const { + return cyclePatternMap; + } + + void GenerateObjectMaps(BECommon &beCommon) override; + + bool IsExclusiveFunc(MIRFunction&) override; + + void FindOrCreateRepresentiveSym(std::vector &bitmapWords, uint32 rcHeader, const std::string &name); + + void CreateRefSymForGlobalPtn(GCTIBPattern &ptn); + + std::string FindGCTIBPatternName(const std::string &name) const override; + + static const AArch64MD kMd[kMopLast]; + enum : uint8 { + kR8List, + kR16List, + kR32List, + kR64List, + kV64List + }; + static std::array, kIntRegTypeNum> intRegNames; + + private: + const std::vector &ehExclusiveNameVec; + const std::unordered_map> &cyclePatternMap; + MapleUnorderedMap keyPatternMap; + MapleUnorderedMap symbolPatternMap; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_cgfunc.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_cgfunc.h new file mode 100644 index 0000000000000000000000000000000000000000..1726f790d6798e1ab093c4c18aba2bf9951d3752 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_cgfunc.h @@ -0,0 +1,650 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CGFUNC_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CGFUNC_H + +#include "cgfunc.h" +#include "mpl_atomic.h" +#include "riscv64_abi.h" +#include "riscv64_operand.h" +#include "riscv64_insn.h" +#include "riscv64_memlayout.h" +#include "riscv64_optimize_common.h" + +namespace maplebe { +class AArch64CGFunc : public CGFunc { + public: + AArch64CGFunc(MIRModule &mod, CG &c, MIRFunction &f, BECommon &b, + MemPool &memPool, MapleAllocator &mallocator, uint32 funcId) + : CGFunc(mod, c, f, b, memPool, mallocator, funcId), + calleeSavedRegs(mallocator.Adapter()), + formalRegList(mallocator.Adapter()), + phyRegOperandTable(std::less(), mallocator.Adapter()), + hashLabelOpndTable(mallocator.Adapter()), + hashOfstOpndTable(std::less(), mallocator.Adapter()), + hashMemOpndTable(std::less(), mallocator.Adapter()), + memOpndsRequiringOffsetAdjustment(std::less(), mallocator.Adapter()), + memOpndsForStkPassedArguments(std::less(), mallocator.Adapter()), + immOpndsRequiringOffsetAdjustment(mallocator.Adapter()), + immOpndsRequiringOffsetAdjustmentForRefloc(mallocator.Adapter()) { + uCatch.regNOCatch = 0; + CGFunc::SetMemlayout(*memPool.New(b, f, mallocator)); + CGFunc::GetMemlayout()->SetCurrFunction(*this); + } + + ~AArch64CGFunc() override = default; + + const MapleVector &GetFormalRegList() const { + return formalRegList; + } + + void PushElemIntoFormalRegList(AArch64reg reg) { + formalRegList.emplace_back(reg); + } + + uint32 GetRefCount() const { + return refCount; + } + + int32 GetBeginOffset() const { + return beginOffset; + } + + MOperator PickMovInsn(PrimType primType); + MOperator PickMovInsn(RegOperand &lhs, RegOperand &rhs); + + regno_t NewVRflag() override { + ASSERT(maxRegCount > kRFLAG, "CG internal error."); + constexpr uint8 size = 4; + if (maxRegCount <= kRFLAG) { + maxRegCount += (kRFLAG + kVRegisterNumber); + vRegTable.resize(maxRegCount); + } + new (&vRegTable[kRFLAG]) VirtualRegNode(kRegTyCc, size); + return kRFLAG; + } + + void IntrinsifyGetAndAddInt(AArch64ListOperand &srcOpnds, PrimType pty); + void IntrinsifyGetAndSetInt(AArch64ListOperand &srcOpnds, PrimType pty); + void IntrinsifyCompareAndSwapInt(AArch64ListOperand &srcOpnds, PrimType pty); + void IntrinsifyStringIndexOf(AArch64ListOperand &srcOpnds, const MIRSymbol &funcSym); + MOperator PickMovInsn(uint32 bitLen, RegType regType); + void GenSaveMethodInfoCode(BB &bb) override; + void DetermineReturnTypeofCall() override; + void HandleRCCall(bool begin, const MIRSymbol *retRef = nullptr) override; + bool GenRetCleanup(const IntrinsiccallNode *cleanupNode, bool forEA = false); + void HandleRetCleanup(NaryStmtNode &retNode) override; + void MergeReturn() override; + RegOperand *ExtractNewMemBase(MemOperand &memOpnd); + void SelectDassign(DassignNode &stmt, Operand &opnd0) override; + void SelectRegassign(RegassignNode &stmt, Operand &opnd0) override; + void SelectAssertNull(UnaryStmtNode &stmt) override; + void SelectAggDassign(DassignNode &stmt) override; + void SelectIassign(IassignNode &stmt) override; + void SelectAggIassign(IassignNode &stmt, Operand &lhsAddrOpnd) override; + void SelectReturn(Operand *opnd0) override; + void SelectIgoto(Operand *opnd0) override; + void SelectCondGoto(CondGotoNode &stmt, Operand &opnd0, Operand &opnd1) override; + void SelectCondGoto(LabelOperand &targetOpnd, Opcode jmpOp, Opcode cmpOp, Operand &opnd0, Operand &opnd1, + PrimType primType); + void SelectCondSpecialCase1(CondGotoNode &stmt, BaseNode &opnd0) override; + void SelectCondSpecialCase2(const CondGotoNode &stmt, BaseNode &opnd0) override; + void SelectGoto(GotoNode &stmt) override; + void SelectCall(CallNode &callNode) override; + void SelectIcall(IcallNode &icallNode, Operand &fptrOpnd) override; + void SelectIntrinCall(IntrinsiccallNode &intrinsicCallNode) override; + void SelectMembar(StmtNode &membar) override; + void SelectComment(CommentNode &comment) override; + + void HandleCatch() override; + Operand *SelectDread(const BaseNode &parent, AddrofNode &expr) override; + RegOperand *SelectRegread(RegreadNode &expr) override; + + void SelectAddrof(Operand &result, StImmOperand &stImm); + void SelectAddrof(Operand &result, AArch64MemOperand &memOpnd); + Operand *SelectAddrof(AddrofNode &expr) override; + Operand &SelectAddrofFunc(AddroffuncNode &expr) override; + Operand &SelectAddrofLabel(AddroflabelNode &expr) override; + + PrimType GetDestTypeFromAggSize(uint32 bitSize) const; + + Operand *SelectIread(const BaseNode &parent, IreadNode &expr) override; + + Operand *SelectIntConst(MIRIntConst &intConst) override; + Operand *SelectFloatConst(MIRFloatConst &floatConst) override; + Operand *SelectDoubleConst(MIRDoubleConst &doubleConst) override; + Operand *SelectStrConst(MIRStrConst &strConst) override; + Operand *SelectStr16Const(MIRStr16Const &str16Const) override; + + void SelectAdd(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + Operand *SelectAdd(BinaryNode &node, Operand &o0, Operand &o1) override; + Operand &SelectCGArrayElemAdd(BinaryNode &node) override; + Operand *SelectShift(BinaryNode &node, Operand &o0, Operand &o1) override; + Operand *SelectSub(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectSub(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + Operand *SelectBand(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectBand(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + Operand *SelectBior(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectBior(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + Operand *SelectBxor(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectBxor(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + + void SelectBxorShift(Operand &resOpnd, Operand *o0, Operand *o1, Operand &o2, PrimType primType); + Operand *SelectLand(BinaryNode &node, Operand &o0, Operand &o1) override; + Operand *SelectLor(BinaryNode &node, Operand &o0, Operand &o1, bool parentIsBr = false) override; + Operand *SelectMin(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectMin(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + Operand *SelectMax(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectMax(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + void SelectFMinFMax(Operand &resOpnd, Operand &o0, Operand &o1, bool is64Bits, bool isMin); + void SelectCmpOp(Operand &resOpnd, Operand &o0, Operand &o1, Opcode opCode, PrimType primType); + + Operand *SelectCmpOp(CompareNode &node, Operand &o0, Operand &o1) override; + + void SelectAArch64Cmp(Operand &o, Operand &i, bool isIntType, uint32 dsize); + void SelectTargetFPCmpQuiet(Operand &o0, Operand &o1, uint32 dsize); + void SelectAArch64CCmp(Operand &o, Operand &i, Operand &nzcv, CondOperand &cond, bool is64Bits); + void SelectAArch64CSet(Operand &o, CondOperand &cond, bool is64Bits); + void SelectAArch64CSINV(Operand &res, Operand &o0, Operand &o1, CondOperand &cond, bool is64Bits); + void SelectAArch64CSINC(Operand &res, Operand &o0, Operand &o1, CondOperand &cond, bool is64Bits); + void SelectShift(Operand &resOpnd, Operand &o0, Operand &o1, ShiftDirection direct, PrimType primType); + Operand *SelectMpy(BinaryNode &node, Operand &o0, Operand &o1) override; + void SelectMpy(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType) override; + /* method description contains method information which is metadata for reflection. */ + MemOperand *AdjustMemOperandIfOffsetOutOfRange(MemOperand *memOpnd, regno_t regNO, bool isDest, Insn &insn, + AArch64reg regNum, bool &isOutOfRange); + void SelectAddAfterInsn(Operand &resOpnd, Operand &o0, Operand &o1, PrimType primType, bool isDest, Insn &insn); + bool IsImmediateOffsetOutOfRange(AArch64MemOperand &memOpnd, uint32 bitLen); + Operand *SelectRem(BinaryNode &node, Operand &opnd0, Operand &opnd1) override; + void SelectDiv(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) override; + Operand *SelectDiv(BinaryNode &node, Operand &opnd0, Operand &opnd1) override; + Operand *SelectAbs(UnaryNode &node, Operand &opnd0) override; + Operand *SelectBnot(UnaryNode &node, Operand &opnd0) override; + Operand *SelectExtractbits(ExtractbitsNode &node, Operand &opnd0) override; + Operand *SelectDepositBits(DepositbitsNode &node, Operand &opnd0, Operand &opnd1) override; + void SelectDepositBits(Operand &resOpnd, Operand &opnd0, Operand &opnd1, uint32 bitOffset, uint32 bitSize, + PrimType regType) override; + Operand *SelectLnot(UnaryNode &node, Operand &opnd0) override; + Operand *SelectNeg(UnaryNode &node, Operand &opnd0) override; + void SelectNeg(Operand &dest, Operand &opnd0, PrimType primType); + void SelectMvn(Operand &dest, Operand &opnd0, PrimType primType); + Operand *SelectRecip(UnaryNode &node, Operand &opnd0) override; + Operand *SelectSqrt(UnaryNode &node, Operand &opnd0) override; + Operand *SelectCeil(TypeCvtNode &node, Operand &opnd0) override; + Operand *SelectFloor(TypeCvtNode &node, Operand &opnd0) override; + Operand *SelectRetype(TypeCvtNode &node, Operand &opnd0) override; + Operand *SelectRound(TypeCvtNode &node, Operand &opnd0) override; + Operand *SelectCvt(const BaseNode &parent, TypeCvtNode &node, Operand &opnd0) override; + Operand *SelectTrunc(TypeCvtNode &node, Operand &opnd0) override; + Operand *SelectSelect(TernaryNode &node, Operand &opnd0, Operand &opnd1, Operand &opnd2) override; + Operand *SelectMalloc(UnaryNode &call, Operand &opnd0) override; + Operand *SelectAlloca(UnaryNode &call, Operand &opnd0) override; + Operand *SelectGCMalloc(GCMallocNode &call) override; + Operand *SelectJarrayMalloc(JarrayMallocNode &call, Operand &opnd0) override; + void SelectSelect(Operand &resOpnd, Operand &condOpnd, Operand &trueOpnd, Operand &falseOpnd, PrimType dtype, + PrimType ctype); + void SelectAArch64Select(Operand &dest, Operand &opnd0, Operand &opnd1, CondOperand &cond, bool isIntType, + uint32 is64bits); + void SelectRangeGoto(RangeGotoNode &rangeGotoNode, Operand &opnd0) override; + Operand *SelectLazyLoad(Operand &opnd0, PrimType primType) override; + Operand *SelectLazyLoadStatic(MIRSymbol &st, int64 offset, PrimType primType) override; + Operand *SelectLoadArrayClassCache(MIRSymbol &st, int64 offset, PrimType primType) override; + RegOperand &SelectCopy(Operand &src, PrimType stype, PrimType dtype) override; + void SelectCopy(Operand &dest, PrimType dtype, Operand &src, PrimType stype); + void SelectCopyImm(Operand &dest, ImmOperand &src, PrimType dtype); + void SelectLibCall(const std::string&, std::vector&, PrimType, PrimType, bool is2ndRet = false); + Operand &GetTargetRetOperand(PrimType primType, int32 sReg) override; + Operand &GetOrCreateRflag() override; + const Operand *GetRflag() const override; + Operand &GetOrCreatevaryreg(); + RegOperand &CreateRegisterOperandOfType(PrimType primType); + RegOperand &CreateRegisterOperandOfType(RegType regType, uint32 byteLen); + RegOperand &CreateRflagOperand(); + RegOperand &GetOrCreateSpecialRegisterOperand(PregIdx sregIdx, PrimType primType = PTY_i64); + MemOperand *GetOrCreatSpillMem(regno_t vrNum); + void FreeSpillRegMem(regno_t vrNum); + AArch64RegOperand &GetOrCreatePhysicalRegisterOperand(AArch64reg regNO, uint32 size, RegType type, uint32 flag = 0); + RegOperand &CreateVirtualRegisterOperand(regno_t vregNO) override; + RegOperand &GetOrCreateVirtualRegisterOperand(regno_t vregNO) override; + const LabelOperand *GetLabelOperand(LabelIdx labIdx) const override; + LabelOperand &GetOrCreateLabelOperand(LabelIdx labIdx) override; + LabelOperand &GetOrCreateLabelOperand(BB &bb) override; + LabelOperand &CreateFuncLabelOperand(const MIRSymbol &func); + + AArch64ImmOperand &CreateImmOperand(PrimType ptyp, int64 val) override { + return CreateImmOperand(val, GetPrimTypeBitSize(ptyp), IsSignedInteger(ptyp)); + } + + Operand *CreateZeroOperand(PrimType ptyp) override { + (void)ptyp; + ASSERT(false, "NYI"); + return nullptr; + } + + Operand &CreateFPImmZero(PrimType ptyp) override { + return GetOrCreateFpZeroOperand(GetPrimTypeBitSize(ptyp)); + } + + const Operand *GetFloatRflag() const override { + return nullptr; + } + /* create an integer immediate operand */ + AArch64ImmOperand &CreateImmOperand(int64 val, uint32 size, bool isSigned, VaryType varyType = kNotVary, + bool isFmov = false) { + return *memPool->New(val, size, isSigned, varyType, isFmov); + } + + ImmFPZeroOperand &GetOrCreateFpZeroOperand(uint8 size) { + return *ImmFPZeroOperand::allocate(size); + } + + AArch64OfstOperand &GetOrCreateOfstOpnd(uint32 offset, uint32 size); + + AArch64OfstOperand &CreateOfstOpnd(uint32 offset, uint32 size) { + return *memPool->New(offset, size); + } + + AArch64OfstOperand &CreateOfstOpnd(const MIRSymbol &mirSymbol, int32 relocs) { + return *memPool->New(mirSymbol, 0, relocs); + } + + AArch64OfstOperand &CreateOfstOpnd(const MIRSymbol &mirSymbol, int64 offset, int32 relocs) { + return *memPool->New(mirSymbol, 0, offset, relocs); + } + + StImmOperand &CreateStImmOperand(const MIRSymbol &mirSymbol, int64 offset, int32 relocs) { + return *memPool->New(mirSymbol, offset, relocs); + } + + RegOperand &GetOrCreateFramePointerRegOperand() override { + return GetOrCreateStackBaseRegOperand(); + } + + RegOperand &GetOrCreateStackBaseRegOperand() override { + return GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + } + + RegOperand &GenStructParamIndex(RegOperand &base, const BaseNode &indexExpr, int shift); + + MemOperand &GetOrCreateMemOpnd(const MIRSymbol &symbol, int32 offset, uint32 size, bool forLocalRef = false); + + AArch64MemOperand &GetOrCreateMemOpnd(AArch64MemOperand::AArch64AddressingMode, uint32, RegOperand*, RegOperand*, + OfstOperand*, const MIRSymbol*); + + AArch64MemOperand &GetOrCreateMemOpnd(AArch64MemOperand::AArch64AddressingMode, uint32 size, RegOperand *base, + RegOperand *index, int32 shift, bool isSigned = false); + + MemOperand &CreateMemOpnd(AArch64reg reg, int32 offset, uint32 size) { + AArch64RegOperand &baseOpnd = GetOrCreatePhysicalRegisterOperand(reg, kSizeOfPtr * kBitsPerByte, kRegTyInt); + return CreateMemOpnd(baseOpnd, offset, size); + } + + MemOperand &CreateMemOpnd(RegOperand &baseOpnd, int32 offset, uint32 size); + + MemOperand &CreateMemOpnd(RegOperand &baseOpnd, int32 offset, uint32 size, const MIRSymbol &sym); + + MemOperand &CreateMemOpnd(PrimType ptype, const BaseNode &parent, BaseNode &addrExpr, int32 offset = 0, + AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone); + + CondOperand &GetCondOperand(AArch64CC_t op) { + return ccOperands[op]; + } + + LogicalShiftLeftOperand *GetLogicalShiftLeftOperand(uint32 shiftAmount, bool is64bits) { + /* num(0, 16, 32, 48) >> 4 is num1(0, 1, 2, 3), num1 & (~3) == 0 */ + ASSERT((!shiftAmount || ((shiftAmount >> 4) & ~static_cast(3)) == 0), + "shift amount should be one of 0, 16, 32, 48"); + /* movkLslOperands[4]~movkLslOperands[7] is for 64 bits */ + return &movkLslOperands[(shiftAmount >> 4) + (is64bits ? 4 : 0)]; + } + + BitShiftOperand &CreateBitShiftOperand(BitShiftOperand::ShiftOp op, uint32 amount, int32 bitLen) { + return *memPool->New(op, amount, bitLen); + } + + ExtendShiftOperand &CreateExtendShiftOperand(ExtendShiftOperand::ExtendOp op, uint32 amount, int32 bitLen) { + return *memPool->New(op, amount, bitLen); + } + + void SplitMovImmOpndInstruction(int64 immVal, RegOperand &destReg); + + Operand &GetOrCreateFuncNameOpnd(const MIRSymbol &symbol); + void GenerateYieldpoint(BB &bb) override; + Operand &ProcessReturnReg(PrimType primType, int32 sReg) override; + void GenerateCleanupCode(BB &bb) override; + bool NeedCleanup() override; + void GenerateCleanupCodeForExtEpilog(BB &bb) override; + Operand *GetBaseReg(const AArch64SymbolAlloc &symAlloc); + int32 GetBaseOffset(const SymbolAlloc &symAlloc) override; + + Operand &CreateCommentOperand(const std::string &s) { + return *memPool->New(s, *memPool); + } + + Operand &CreateCommentOperand(const MapleString &s) { + return *memPool->New(s.c_str(), *memPool); + } + + void AddtoCalleeSaved(AArch64reg reg) { + if (find(calleeSavedRegs.begin(), calleeSavedRegs.end(), reg) != calleeSavedRegs.end()) { + return; + } + calleeSavedRegs.emplace_back(reg); + ASSERT((AArch64isa::IsGPRegister(reg) || AArch64isa::IsFPSIMDRegister(reg)), "Int or FP registers are expected"); + if (AArch64isa::IsGPRegister(reg)) { + ++numIntregToCalleeSave; + } else { + ++numFpregToCalleeSave; + } + } + + int32 SizeOfCalleeSaved() { + /* npairs = num / 2 + num % 2 */ + int32 nPairs = (numIntregToCalleeSave >> 1) + (numIntregToCalleeSave & 0x1); + nPairs += (numFpregToCalleeSave >> 1) + (numFpregToCalleeSave & 0x1); + return(nPairs * (kIntregBytelen << 1)); + } + + void NoteFPLRAddedToCalleeSavedList() { + fplrAddedToCalleeSaved = true; + } + + bool IsFPLRAddedToCalleeSavedList() { + return fplrAddedToCalleeSaved; + } + + bool UsedStpSubPairForCallFrameAllocation() { + return usedStpSubPairToAllocateCallFrame; + } + void SetUsedStpSubPairForCallFrameAllocation(bool val) { + usedStpSubPairToAllocateCallFrame = val; + } + + const MapleVector &GetCalleeSavedRegs() const { + return calleeSavedRegs; + } + + Insn *GetYieldPointInsn() { + return yieldPointInsn; + } + + const Insn *GetYieldPointInsn() const { + return yieldPointInsn; + } + + IntrinsiccallNode *GetCleanEANode() { + return cleanEANode; + } + + AArch64MemOperand &CreateStkTopOpnd(int32 offset, int32 size); + + /* if offset < 0, allocation; otherwise, deallocation */ + AArch64MemOperand &CreateCallFrameOperand(int32 offset, int32 size); + + void AppendCall(const MIRSymbol &func); + Insn &AppendCall(const MIRSymbol &func, AArch64ListOperand &srcOpnds, bool isCleanCall = false); + + static constexpr uint32 kDwarfScalarRegBegin = 0; + static constexpr uint32 kDwarfFpRegBegin = 64; + static constexpr int32 kBitLenOfShift64Bits = 6; /* for 64 bits register, shift amount is 0~63, use 6 bits to store */ + static constexpr int32 kBitLenOfShift32Bits = 5; /* for 32 bits register, shift amount is 0~31, use 5 bits to store */ + static constexpr int32 kHighestBitOf64Bits = 63; /* 63 is highest bit of a 64 bits number */ + static constexpr int32 kHighestBitOf32Bits = 31; /* 31 is highest bit of a 32 bits number */ + static constexpr int32 k16ValidBit = 16; + + /* CFI directives related stuffs */ + Operand &CreateCfiRegOperand(uint32 reg, uint32 size) override { + /* + * DWARF for ARM Architecture (ARM IHI 0040B) 3.1 Table 1 + * Having kRinvalid=0 (see arm32_isa.h) means + * each register gets assigned an id number one greater than + * its physical number + */ + if (reg < V0) { + return *memPool->New(reg - R0 + kDwarfScalarRegBegin, size); + } else { + return *memPool->New(reg - V0 + kDwarfFpRegBegin, size); + } + } + + void SetCatchRegno(regno_t regNO) { + uCatch.regNOCatch = regNO; + } + + regno_t GetCatchRegno() { + return uCatch.regNOCatch; + } + + void SetCatchOpnd(Operand &opnd) { + uCatch.opndCatch = &opnd; + } + + AArch64reg GetReturnRegisterNumber(); + + MOperator PickStInsn(uint32 bitSize, PrimType primType, AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone); + MOperator PickLdInsn(uint32 bitSize, PrimType primType, AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone); + + bool CheckIfSplitOffsetWithAdd(const AArch64MemOperand &memOpnd, uint32 bitLen); + AArch64MemOperand &SplitOffsetWithAddInstruction(const AArch64MemOperand &memOpnd, uint32 bitLen, + AArch64reg baseRegNum = AArch64reg::kRinvalid, bool isDest = false, + Insn *insn = nullptr); + AArch64MemOperand &CreateReplacementMemOperand(uint32 bitLen, RegOperand &baseReg, int32 offset); + + bool HasStackLoadStore(); + + MemOperand &LoadStructCopyBase(const MIRSymbol &symbol, int32 offset, int datasize); + + int32 GetSplitBaseOffset() const { + return splitStpldpBaseOffset; + } + void SetSplitBaseOffset(int32 val) { + splitStpldpBaseOffset = val; + } + + Insn &CreateCommentInsn(const std::string &comment) { + return cg->BuildInstruction(MOP_comment, CreateCommentOperand(comment)); + } + + Insn &CreateCommentInsn(const MapleString &comment) { + return cg->BuildInstruction(MOP_comment, CreateCommentOperand(comment)); + } + + Insn &CreateCfiRestoreInsn(uint32 reg, uint32 size) { + return cg->BuildInstruction(cfi::OP_CFI_restore, CreateCfiRegOperand(reg, size)); + } + + Insn &CreateCfiOffsetInsn(uint32 reg, int64 val, uint32 size) { + return cg->BuildInstruction(cfi::OP_CFI_offset, CreateCfiRegOperand(reg, size), + CreateCfiImmOperand(val, size)); + } + Insn &CreateCfiDefCfaInsn(uint32 reg, int64 val, uint32 size) { + return cg->BuildInstruction(cfi::OP_CFI_def_cfa, CreateCfiRegOperand(reg, size), + CreateCfiImmOperand(val, size)); + } + + InsnVisitor *NewInsnModifier() override { + return memPool->New(*this); + } + + RegType GetRegisterType(regno_t reg) const override; + + private: + enum RelationOperator : uint8 { + kAND, + kIOR, + kEOR + }; + + enum RelationOperatorOpndPattern : uint8 { + kRegReg, + kRegImm + }; + + enum RoundType : uint8 { + kCeil, + kFloor, + kRound + }; + + static constexpr int32 kMaxMovkLslEntries = 8; + using MovkLslOperandArray = std::array; + + MapleVector calleeSavedRegs; + MapleVector formalRegList; /* store the parameters register used by this function */ + uint32 refCount = 0; /* Ref count number. 0 if function don't have "bl MCC_InitializeLocalStackRef" */ + int32 beginOffset = 0; /* Begin offset based x29. */ + Insn *yieldPointInsn = nullptr; /* The insn of yield point at the entry of the func. */ + IntrinsiccallNode *cleanEANode = nullptr; + + MapleMap phyRegOperandTable; /* machine register operand table */ + MapleUnorderedMap hashLabelOpndTable; + MapleMap hashOfstOpndTable; + MapleMap hashMemOpndTable; + /* + * Local variables, formal parameters that are passed via registers + * need offset adjustment after callee-saved registers are known. + */ + MapleMap memOpndsRequiringOffsetAdjustment; + MapleMap memOpndsForStkPassedArguments; + MapleUnorderedMap immOpndsRequiringOffsetAdjustment; + MapleUnorderedMap immOpndsRequiringOffsetAdjustmentForRefloc; + union { + regno_t regNOCatch; /* For O2. */ + Operand *opndCatch; /* For O0-O1. */ + } uCatch; + Operand *rcc = nullptr; + Operand *vary = nullptr; + Operand *fsp = nullptr; /* used to point the address of local variables and formal parameters */ + + static CondOperand ccOperands[kCcLast]; + static MovkLslOperandArray movkLslOperands; + static LogicalShiftLeftOperand addSubLslOperand; + uint32 numIntregToCalleeSave = 0; + uint32 numFpregToCalleeSave = 0; + bool fplrAddedToCalleeSaved = false; + bool usedStpSubPairToAllocateCallFrame = false; + int32 splitStpldpBaseOffset = 0; + regno_t methodHandleVreg = -1; + + void SelectLoadAcquire(Operand &dest, PrimType dtype, Operand &src, PrimType stype, + AArch64isa::MemoryOrdering memOrd, bool isDirect); + void SelectStoreRelease(Operand &dest, PrimType dtype, Operand &src, PrimType stype, + AArch64isa::MemoryOrdering memOrd, bool isDirect); + MOperator PickJmpInsn(Opcode brOp, Opcode cmpOp, bool isFloat, bool isSigned); + Operand &GetZeroOpnd(uint32 size) override; + bool IsFrameReg(const RegOperand &opnd) const override; + + PrimType GetOperandType(bool isIntty, uint32 dsize, bool isSigned) { + ASSERT(!isSigned || isIntty, ""); + return (isIntty ? ((dsize == k64BitSize) ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32)) + : ((dsize == k64BitSize) ? PTY_f64 : PTY_f32)); + } + + RegOperand &LoadIntoRegister(Operand &o, bool isIntty, uint32 dsize, bool asSigned = false) { + return LoadIntoRegister(o, GetOperandType(isIntty, dsize, asSigned)); + } + + RegOperand &LoadIntoRegister(Operand &o, PrimType oty) { + return (o.IsRegister() ? static_cast(o) : SelectCopy(o, oty, oty)); + } + + void CreateCallStructParamPassByStack(int32 symSize, MIRSymbol *sym, RegOperand *addrOpnd, int32 baseOffset); + void CreateCallStructParamPassByReg(AArch64reg reg, MemOperand &memOpnd, AArch64ListOperand &srcOpnds); + void CreateCallStructParamMemcpy(const MIRSymbol *sym, RegOperand *addropnd, + uint32 structSize, int32 copyOffset, int32 fromOffset); + AArch64RegOperand *CreateCallStructParamCopyToStack(uint32 numMemOp, MIRSymbol *sym, RegOperand *addropnd, + int32 copyOffset, AArch64reg reg); + void SelectParmListDreadSmallAggregate(MIRSymbol &sym, MIRType &structType, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator); + void SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator); + void SelectParmListDreadLargeAggregate(MIRSymbol &sym, MIRType &structType, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator, int32 &structCopyOffset); + void SelectParmListIreadLargeAggregate(const IreadNode &iread, MIRType &structType, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator, int32 &structCopyOffset); + void CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, ParmLocator &parmLocator, + AArch64ListOperand &srcOpnds); + void SelectParmListForAggregate(BaseNode &argExpr, AArch64ListOperand &srcOpnds, ParmLocator &parmLocator, + int32 &structCopyOffset); + uint32 SelectParmListGetStructReturnSize(StmtNode &naryNode); + void SelectParmList(StmtNode &naryNode, AArch64ListOperand &srcOpnds, bool isCallNative = false); + Operand *SelectClearStackCallParam(const AddrofNode &expr, int64 &offsetValue); + void SelectClearStackCallParmList(const StmtNode &naryNode, AArch64ListOperand &srcOpnds, + std::vector &stackPostion); + void SelectRem(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType, bool isSigned, bool is64Bits); + void SelectCvtInt2Int(const BaseNode *parent, Operand *&resOpnd, Operand *opnd0, PrimType fromType, PrimType toType); + void SelectCvtFloat2Float(Operand &resOpnd, Operand &opnd0, PrimType fromType, PrimType toType); + void SelectCvtFloat2Int(Operand &resOpnd, Operand &opnd0, PrimType itype, PrimType ftype); + void SelectCvtInt2Float(Operand &resOpnd, Operand &opnd0, PrimType toType, PrimType fromType); + Operand *SelectRelationOperator(RelationOperator operatorCode, const BinaryNode &node, Operand &opnd0, + Operand &opnd1); + void SelectRelationOperator(RelationOperator operatorCode, Operand &resOpnd, Operand &opnd0, Operand &opnd1, + PrimType primType); + MOperator SelectRelationMop(RelationOperator operatorType, RelationOperatorOpndPattern opndPattern, + bool is64Bits, bool IsBitmaskImmediate, bool isBitNumLessThan16) const; + Operand *SelectMinOrMax(bool isMin, const BinaryNode &node, Operand &opnd0, Operand &opnd1); + void SelectMinOrMax(bool isMin, Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType); + Operand *SelectRoundLibCall(RoundType roundType, const TypeCvtNode &node, Operand &opnd0); + Operand *SelectRoundOperator(RoundType roundType, const TypeCvtNode &node, Operand &opnd0); + int64 GetOrCreatSpillRegLocation(regno_t vrNum) { + AArch64SymbolAlloc *symLoc = static_cast(GetMemlayout()->GetLocOfSpillRegister(vrNum)); + return static_cast(GetBaseOffset(*symLoc)); + } + + void SelectCopyMemOpnd(Operand &dest, PrimType dtype, uint32 dsize, Operand &src, PrimType stype); + void SelectCopyRegOpnd(Operand &dest, PrimType dtype, Operand::OperandType opndType, uint32 dsize, Operand &src, + PrimType stype); + bool GenerateCompareWithZeroInstruction(Opcode jmpOp, Opcode cmpOp, bool is64Bits, + LabelOperand &targetOpnd, Operand &opnd0); + void GenCVaStartIntrin(RegOperand &opnd, uint32 stkSize); + void SelectCVaStart(const IntrinsiccallNode &intrnNode); + void SelectMPLClinitCheck(IntrinsiccallNode&); + void SelectMPLProfCounterInc(IntrinsiccallNode &intrnNode); + /* Helper functions for translating complex Maple IR instructions/inrinsics */ + void SelectDassign(StIdx stIdx, FieldID fieldId, PrimType rhsPType, Operand &opnd0); + LabelIdx CreateLabeledBB(StmtNode &stmt); + void SaveReturnValueInLocal(CallReturnVector &retVals, size_t index, PrimType primType, Operand &value, + StmtNode &parentStmt); + /* Translation for load-link store-conditional, and atomic RMW operations. */ + MemOrd OperandToMemOrd(Operand &opnd); + MOperator PickLoadStoreExclInsn(uint32 byteP2Size, bool store, bool acqRel) const; + RegOperand *SelectLoadExcl(PrimType valPrimType, AArch64MemOperand &loc, bool acquire); + RegOperand *SelectStoreExcl(PrimType valPty, AArch64MemOperand &loc, RegOperand &newVal, bool release); + + MemOperand *GetPseudoRegisterSpillMemoryOperand(PregIdx i) override; + void ProcessLazyBinding() override; + bool CanLazyBinding(const Insn &insn); + void ConvertAdrpl12LdrToLdr(); + void ConvertAdrpLdrToIntrisic(); + bool IsStoreMop(MOperator mOp) const; + bool IsImmediateValueInRange(MOperator mOp, int64 immVal, bool is64Bits, + bool isIntactIndexed, bool isPostIndexed, bool isPreIndexed) const; + Insn &GenerateGlobalLongCallAfterInsn(const MIRSymbol &func, AArch64ListOperand &srcOpnds, + bool isCleanCall = false); + Insn &GenerateLocalLongCallAfterInsn(const MIRSymbol &func, AArch64ListOperand &srcOpnds, + bool isCleanCall = false); + bool IsDuplicateAsmList(const MIRSymbol &sym) const; + RegOperand *CheckStringIsCompressed(BB &bb, RegOperand &str, int32 countOffset, PrimType countPty, + LabelIdx jumpLabIdx); + RegOperand *CheckStringLengthLessThanEight(BB &bb, RegOperand &countOpnd, PrimType countPty, LabelIdx jumpLabIdx); + void GenerateIntrnInsnForStrIndexOf(BB &bb, RegOperand &srcString, RegOperand &patternString, + RegOperand &srcCountOpnd, RegOperand &patternLengthOpnd, + PrimType countPty, LabelIdx jumpLabIdx); + MemOperand *CheckAndCreateExtendMemOpnd(PrimType ptype, BaseNode &addrExpr, int32 offset, + AArch64isa::MemoryOrdering memOrd); +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CGFUNC_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_color_ra.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_color_ra.h new file mode 100644 index 0000000000000000000000000000000000000000..7abb7ae62fd940a479143d79ed3e9cbda3604702 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_color_ra.h @@ -0,0 +1,1301 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_COLOR_RA_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_COLOR_RA_H +#include "riscv64_reg_alloc.h" +#include "riscv64_operand.h" +#include "riscv64_insn.h" +#include "riscv64_abi.h" +#include "loop.h" + +namespace maplebe { +#define RESERVED_REGS + +#define USE_LRA +#define USE_SPLIT +#undef USE_BB_FREQUENCY +#define OPTIMIZE_FOR_PROLOG +#undef REUSE_SPILLMEM +#undef COLOR_SPLIT +#undef MOVE_COALESCE + +/* for robust test */ +#undef CONSISTENT_MEMOPND +#undef RANDOM_PRIORITY + +constexpr uint32 k32 = sizeof(int) * CHAR_BIT; +constexpr uint32 k64 = sizeof(int64) * CHAR_BIT; +constexpr uint32 kU64 = sizeof(uint64) * CHAR_BIT; + +template > +inline bool FindNotIn(const std::set &set, const T &item) { + return set.find(item) == set.end(); +} + +template > +inline bool FindNotIn(const std::unordered_set &set, const T &item) { + return set.find(item) == set.end(); +} + +template +inline bool FindNotIn(const MapleSet &set, const T &item) { + return set.find(item) == set.end(); +} + +template +inline bool FindNotIn(const MapleUnorderedSet &set, const T &item) { + return set.find(item) == set.end(); +} + +template +inline bool FindNotIn(const MapleList &list, const T &item) { + return std::find(list.begin(), list.end(), item) == list.end(); +} + +template > +inline bool FindIn(const std::set &set, const T &item) { + return set.find(item) != set.end(); +} + +template > +inline bool FindIn(const std::unordered_set &set, const T &item) { + return set.find(item) != set.end(); +} + +template +inline bool FindIn(const MapleSet &set, const T &item) { + return set.find(item) != set.end(); +} + +template +inline bool FindIn(const MapleUnorderedSet &set, const T &item) { + return set.find(item) != set.end(); +} + +template +inline bool FindIn(const MapleList &list, const T &item) { + return std::find(list.begin(), list.end(), item) != list.end(); +} + +inline bool IsBitArrElemSet(const uint64 *vec, const uint32 num) { + size_t index = num / kU64; + uint64 bit = num % kU64; + return vec[index] & (1ULL << bit); +} + +inline bool IsBBsetOverlap(const uint64 *vec1, const uint64 *vec2, uint32 bbBuckets) { + for (uint32 i = 0; i < bbBuckets; ++i) { + if ((vec1[i] & vec2[i]) != 0) { + return true; + } + } + return false; +} + +/* + * This is per bb per LR. + * LU info is particular to a bb in a LR. + */ +class LiveUnit { + public: + LiveUnit() = default; + ~LiveUnit() = default; + + void PrintLiveUnit() const; + + uint32 GetBegin() const { + return begin; + } + + void SetBegin(uint32 val) { + begin = val; + } + + uint32 GetEnd() const { + return end; + } + + void SetEnd(uint32 end) { + this->end = end; + } + + bool HasCall() const { + return hasCall; + } + + void SetHasCall(bool hasCall) { + this->hasCall = hasCall; + } + + uint32 GetDefNum() const { + return defNum; + } + + void SetDefNum(uint32 defNum) { + this->defNum = defNum; + } + + void IncDefNum() { + ++defNum; + } + + uint32 GetUseNum() const { + return useNum; + } + + void SetUseNum(uint32 useNum) { + this->useNum = useNum; + } + + void IncUseNum() { + ++useNum; + } + + bool NeedReload() const { + return needReload; + } + + void SetNeedReload(bool needReload) { + this->needReload = needReload; + } + + bool NeedRestore() const { + return needRestore; + } + + void SetNeedRestore(bool needRestore) { + this->needRestore = needRestore; + } + + private: + uint32 begin = 0; /* first encounter in bb */ + uint32 end = 0; /* last encounter in bb */ + bool hasCall = false; /* bb has a call */ + uint32 defNum = 0; + uint32 useNum = 0; /* used for priority calculation */ + bool needReload = false; + bool needRestore = false; +}; + +struct SortedBBCmpFunc { + bool operator()(const BB *lhs, const BB *rhs) const { + return (lhs->GetLevel() < rhs->GetLevel()); + } +}; + +/* LR is for each global vreg. */ +class LiveRange { + public: + explicit LiveRange(MapleAllocator &allocator) + : pregveto(allocator.Adapter()), + forbidden(allocator.Adapter()), + prefs(allocator.Adapter()), + luMap(allocator.Adapter()) {} + + ~LiveRange() = default; + + regno_t GetRegNO() const { + return regNO; + } + + void SetRegNO(regno_t val) { + regNO = val; + } + + uint32 GetID() const { + return id; + } + + void SetID(uint32 id) { + this->id = id; + } + + regno_t GetAssignedRegNO() const { + return assignedRegNO; + } + + void SetAssignedRegNO(regno_t val) { + assignedRegNO = val; + } + + uint32 GetNumCall() const { + return numCall; + } + + void SetNumCall(uint32 num) { + numCall = num; + } + + void IncNumCall() { + ++numCall; + } + + RegType GetRegType() const { + return regType; + } + + void SetRegType(RegType regType) { + this->regType = regType; + } + + float GetPriority() const { + return priority; + } + + void SetPriority(float priority) { + this->priority = priority; + } + + bool IsMustAssigned() const { + return mustAssigned; + } + + void SetMustAssigned() { + mustAssigned = true; + } + + void SetBBBuckets(uint32 bucketNum) { + bbBuckets = bucketNum; + } + + void SetRegBuckets(uint32 bucketNum) { + regBuckets = bucketNum; + } + + uint32 GetNumBBMembers() const { + return numBBMembers; + } + + void IncNumBBMembers() { + ++numBBMembers; + } + + void DecNumBBMembers() { + --numBBMembers; + } + + void InitBBMember(MemPool &memPool, size_t size) { + bbMember = memPool.NewArray(size); + errno_t ret = memset_s(bbMember, size * sizeof(uint64), 0, size * sizeof(uint64)); + CHECK_FATAL(ret == EOK, "call memset_s failed"); + } + + uint64 *GetBBMember() { + return bbMember; + } + + const uint64 *GetBBMember() const { + return bbMember; + } + + uint64 GetBBMemberElem(int32 index) const { + return bbMember[index]; + } + + void SetBBMemberElem(int32 index, uint64 elem) { + bbMember[index] = elem; + } + + void SetMemberBitArrElem(uint32 bbID) { + uint32 index = bbID / kU64; + uint64 bit = bbID % kU64; + uint64 mask = 1ULL << bit; + if ((GetBBMemberElem(index) & mask) == 0) { + IncNumBBMembers(); + SetBBMemberElem(index, GetBBMemberElem(index) | mask); + } + } + + void UnsetMemberBitArrElem(uint32 bbID) { + uint32 index = bbID / kU64; + uint64 bit = bbID % kU64; + uint64 mask = 1ULL << bit; + if ((GetBBMemberElem(index) & mask) != 0) { + DecNumBBMembers(); + SetBBMemberElem(index, GetBBMemberElem(index) & (~mask)); + } + } + + void SetConflictBitArrElem(regno_t regNO) { + uint32 index = regNO / kU64; + uint64 bit = regNO % kU64; + uint64 mask = 1ULL << bit; + if ((GetBBConflictElem(index) & mask) == 0) { + IncNumBBConflicts(); + SetBBConflictElem(index, GetBBConflictElem(index) | mask); + } + } + + void UnsetConflictBitArrElem(regno_t regNO) { + uint32 index = regNO / kU64; + uint64 bit = regNO % kU64; + uint64 mask = 1ULL << bit; + if ((GetBBConflictElem(index) & mask) != 0) { + DecNumBBConflicts(); + SetBBConflictElem(index, GetBBConflictElem(index) & (~mask)); + } + } + + void InitPregveto() { + pregveto.clear(); + pregveto.resize(kMaxRegNum); + } + + bool GetPregveto(regno_t regNO) const { + return pregveto[regNO]; + } + + size_t GetPregvetoSize() const { + return numPregveto; + } + + void InsertElemToPregveto(regno_t regNO) { + if (!pregveto[regNO]) { + pregveto[regNO] = true; + ++numPregveto; + } + } + + void InitForbidden() { + forbidden.clear(); + forbidden.resize(kMaxRegNum); + } + + const MapleVector &GetForbidden() const { + return forbidden; + } + + bool GetForbidden(regno_t regNO) const { + return forbidden[regNO]; + } + + size_t GetForbiddenSize() const { + return numForbidden; + } + + void InsertElemToForbidden(regno_t regNO) { + if (!forbidden[regNO]) { + forbidden[regNO] = true; + ++numForbidden; + } + } + + void EraseElemFromForbidden(regno_t regNO) { + if (forbidden[regNO]) { + forbidden[regNO] = false; + --numForbidden; + } + } + + void ClearForbidden() { + forbidden.clear(); + } + + uint32 GetNumBBConflicts() const { + return numBBConflicts; + } + + void IncNumBBConflicts() { + ++numBBConflicts; + } + + void DecNumBBConflicts() { + --numBBConflicts; + } + + void InitBBConflict(MemPool &memPool, size_t size) { + bbConflict = memPool.NewArray(size); + errno_t ret = memset_s(bbConflict, size * sizeof(uint64), 0, size * sizeof(uint64)); + CHECK_FATAL(ret == EOK, "call memset_s failed"); + } + + const uint64 *GetBBConflict() const { + return bbConflict; + } + + uint64 GetBBConflictElem(int32 index) const { + ASSERT(index < regBuckets, "out of bbConflict"); + return bbConflict[index]; + } + + void SetBBConflictElem(int32 index, uint64 elem) { + ASSERT(index < regBuckets, "out of bbConflict"); + bbConflict[index] = elem; + } + + void SetOldConflict(uint64 *conflict) { + oldConflict = conflict; + } + + const uint64 *GetOldConflict() const { + return oldConflict; + } + + const MapleSet &GetPrefs() const { + return prefs; + } + + void InsertElemToPrefs(regno_t regNO) { + (void)prefs.insert(regNO); + } + + const MapleMap &GetLuMap() const { + return luMap; + } + + MapleMap::iterator FindInLuMap(uint32 index) { + return luMap.find(index); + } + + MapleMap::iterator EndOfLuMap() { + return luMap.end(); + } + + MapleMap::iterator EraseLuMap(MapleMap::iterator it) { + return luMap.erase(it); + } + + void SetElemToLuMap(uint32 key, LiveUnit &value) { + luMap[key] = &value; + } + + LiveUnit *GetLiveUnitFromLuMap(uint32 key) { + return luMap[key]; + } + + const LiveUnit *GetLiveUnitFromLuMap(uint32 key) const { + auto it = luMap.find(key); + ASSERT(it != luMap.end(), "can't find live unit"); + return it->second; + } + + const LiveRange *GetSplitLr() const { + return splitLr; + } + + void SetSplitLr(LiveRange &lr) { + splitLr = &lr; + } + +#ifdef OPTIMIZE_FOR_PROLOG + uint32 GetNumDefs() const { + return numDefs; + } + + void IncNumDefs() { + ++numDefs; + } + + uint32 GetNumUses() const { + return numUses; + } + + void IncNumUses() { + ++numUses; + } + + uint32 GetFrequency() const { + return frequency; + } + + void SetFrequency(uint32 frequency) { + this->frequency = frequency; + } +#endif /* OPTIMIZE_FOR_PROLOG */ + + MemOperand *GetSpillMem() { + return spillMem; + } + + const MemOperand *GetSpillMem() const { + return spillMem; + } + + void SetSpillMem(MemOperand& memOpnd) { + spillMem = &memOpnd; + } + + regno_t GetSpillReg() const { + return spillReg; + } + + void SetSpillReg(regno_t spillReg) { + this->spillReg = spillReg; + } + + uint32 GetSpillSize() const { + return spillSize; + } + + void SetSpillSize(uint32 size) { + spillSize = size; + } + + bool IsSpilled() const { + return spilled; + } + + void SetSpilled(bool spill) { + spilled = spill; + } + + bool IsNonLocal() const { + return isNonLocal; + } + + void SetIsNonLocal(bool isNonLocal) { + this->isNonLocal = isNonLocal; + } + + private: + regno_t regNO = 0; + uint32 id = 0; /* for priority tie breaker */ + regno_t assignedRegNO = 0; /* color assigned */ + uint32 numCall = 0; + RegType regType = kRegTyUndef; + float priority = 0.0; + bool mustAssigned = false; + uint32 bbBuckets = 0; /* size of bit array for bb (each bucket == 64 bits) */ + uint32 regBuckets = 0; /* size of bit array for reg (each bucket == 64 bits) */ + uint32 numBBMembers = 0; /* number of bits set in bbMember */ + uint64 *bbMember = nullptr; /* Same as smember, but use bit array */ + + MapleVector pregveto; /* pregs cannot be assigned -- SplitLr may clear forbidden */ + MapleVector forbidden; /* pregs cannot be assigned */ + uint32 numPregveto = 0; + uint32 numForbidden = 0; + + uint32 numBBConflicts = 0; /* number of bits set in bbConflict */ + uint64 *bbConflict = nullptr; /* vreg interference from graph neighbors (bit) */ + uint64 *oldConflict = nullptr; + MapleSet prefs; /* pregs that prefer */ + MapleMap luMap; /* info for each bb */ + LiveRange *splitLr = nullptr; /* The 1st part of the split */ +#ifdef OPTIMIZE_FOR_PROLOG + uint32 numDefs = 0; + uint32 numUses = 0; + uint32 frequency = 0; +#endif /* OPTIMIZE_FOR_PROLOG */ + MemOperand *spillMem = nullptr; /* memory operand used for spill, if any */ + regno_t spillReg = 0; /* register operand for spill at current point */ + uint32 spillSize = 0; /* 32 or 64 bit spill */ + bool spilled = false; /* color assigned */ + bool isNonLocal = false; +}; + +/* One per bb, to communicate local usage to global RA */ +class LocalRaInfo { + public: + explicit LocalRaInfo(MapleAllocator &allocator) + : defCnt(allocator.Adapter()), + useCnt(allocator.Adapter()) {} + + ~LocalRaInfo() = default; + + const MapleMap &GetDefCnt() const { + return defCnt; + } + + uint16 GetDefCntElem(regno_t regNO) { + return defCnt[regNO]; + } + + void SetDefCntElem(regno_t key, uint16 value) { + defCnt[key] = value; + } + + const MapleMap &GetUseCnt() const { + return useCnt; + } + + uint16 GetUseCntElem(regno_t regNO) { + return useCnt[regNO]; + } + + void SetUseCntElem(regno_t key, uint16 value) { + useCnt[key] = value; + } + + private: + MapleMap defCnt; + MapleMap useCnt; +}; + +/* For each bb, record info pertain to allocation */ +class BBAssignInfo { + public: + explicit BBAssignInfo(MapleAllocator &allocator) + : globalsAssigned(allocator.Adapter()), + regMap(allocator.Adapter()) {} + + ~BBAssignInfo() = default; + + uint32 GetIntLocalRegsNeeded() const { + return intLocalRegsNeeded; + } + + void SetIntLocalRegsNeeded(uint32 num) { + intLocalRegsNeeded = num; + } + + uint32 GetFpLocalRegsNeeded() const { + return fpLocalRegsNeeded; + } + + void SetFpLocalRegsNeeded(uint32 num) { + fpLocalRegsNeeded = num; + } + + void InitGlobalAssigned() { + globalsAssigned.clear(); + globalsAssigned.resize(kMaxRegNum); + } + + bool GetGlobalsAssigned(regno_t regNO) const { + return globalsAssigned[regNO]; + } + + void InsertElemToGlobalsAssigned(regno_t regNO) { + globalsAssigned[regNO] = true; + } + + void EraseElemToGlobalsAssigned(regno_t regNO) { + globalsAssigned[regNO] = false; + } + + const MapleMap &GetRegMap() const { + return regMap; + } + + bool HasRegMap(regno_t regNOKey) const { + return (regMap.find(regNOKey) != regMap.end()); + } + + regno_t GetRegMapElem(regno_t regNO) { + return regMap[regNO]; + } + + void SetRegMapElem(regno_t regNOKey, regno_t regNOValue) { + regMap[regNOKey] = regNOValue; + } + + private: + uint32 intLocalRegsNeeded = 0; /* num local reg needs for each bb */ + uint32 fpLocalRegsNeeded = 0; /* num local reg needs for each bb */ + MapleVector globalsAssigned; /* globals used in a bb */ + MapleMap regMap; /* local vreg to preg mapping */ +}; + +class FinalizeRegisterInfo { + public: + explicit FinalizeRegisterInfo(MapleAllocator &allocator) + : defOperands(allocator.Adapter()), + defIdx(allocator.Adapter()), + useOperands(allocator.Adapter()), + useIdx(allocator.Adapter()) {} + + ~FinalizeRegisterInfo() = default; + void ClearInfo() { + memOperandIdx = 0; + baseOperand = nullptr; + offsetOperand = nullptr; + defOperands.clear(); + defIdx.clear(); + useOperands.clear(); + useIdx.clear(); + } + + void SetBaseOperand(Operand &opnd, const int32 idx) { + baseOperand = &opnd; + memOperandIdx = idx; + } + + void SetOffsetOperand(Operand &opnd) { + offsetOperand = &opnd; + } + + void SetDefOperand(Operand &opnd, const int32 idx) { + defOperands.emplace_back(&opnd); + defIdx.emplace_back(idx); + } + + void SetUseOperand(Operand &opnd, const int32 idx) { + useOperands.emplace_back(&opnd); + useIdx.emplace_back(idx); + } + + int32 GetMemOperandIdx() const { + return memOperandIdx; + } + + const Operand *GetBaseOperand() const { + return baseOperand; + } + + const Operand *GetOffsetOperand() const { + return offsetOperand; + } + + size_t GetDefOperandsSize() const { + return defOperands.size(); + } + + const Operand *GetDefOperandsElem(size_t index) const { + return defOperands[index]; + } + + int32 GetDefIdxElem(size_t index) const { + return defIdx[index]; + } + + size_t GetUseOperandsSize() const { + return useOperands.size(); + } + + const Operand *GetUseOperandsElem(size_t index) const { + return useOperands[index]; + } + + int32 GetUseIdxElem(size_t index) const { + return useIdx[index]; + } + + private: + int32 memOperandIdx = 0; + Operand *baseOperand = nullptr; + Operand *offsetOperand = nullptr; + MapleVector defOperands; + MapleVector defIdx; + MapleVector useOperands; + MapleVector useIdx; +}; + +class LocalRegAllocator { + public: + LocalRegAllocator(CGFunc &cgFunc, MapleAllocator &allocator) + : intRegAssignmentMap(allocator.Adapter()), + fpRegAssignmentMap(allocator.Adapter()), + useInfo(allocator.Adapter()), + defInfo(allocator.Adapter()) { + buckets = (cgFunc.GetMaxRegNum() / kU64) + 1; + intRegAssigned = cgFunc.GetMemoryPool()->NewArray(buckets); + fpRegAssigned = cgFunc.GetMemoryPool()->NewArray(buckets); + intRegSpilled = cgFunc.GetMemoryPool()->NewArray(buckets); + fpRegSpilled = cgFunc.GetMemoryPool()->NewArray(buckets); + } + + ~LocalRegAllocator() = default; + + void ClearLocalRaInfo() { + ClearBitArrElement(intRegAssigned); + ClearBitArrElement(fpRegAssigned); + intRegAssignmentMap.clear(); + fpRegAssignmentMap.clear(); + intPregUsed = 0; + fpPregUsed = 0; + ClearBitArrElement(intRegSpilled); + ClearBitArrElement(fpRegSpilled); + numIntPregUsed = 0; + numFpPregUsed = 0; + } + + regno_t RegBaseUpdate(bool isInt) const { + return isInt ? 0 : V0 - R0; + } + + bool IsInRegAssigned(regno_t regNO, bool isInt) const { + uint64 *regAssigned = nullptr; + if (isInt) { + regAssigned = intRegAssigned; + } else { + regAssigned = fpRegAssigned; + } + return IsBitArrElemSet(regAssigned, regNO);; + } + + void SetRegAssigned(regno_t regNO, bool isInt) { + if (isInt) { + SetBitArrElement(intRegAssigned, regNO); + } else { + SetBitArrElement(fpRegAssigned, regNO); + } + } + + regno_t GetRegAssignmentItem(bool isInt, regno_t regKey) { + return isInt ? intRegAssignmentMap[regKey] : fpRegAssignmentMap[regKey]; + } + + void SetRegAssignmentMap(bool isInt, regno_t regKey, regno_t regValue) { + if (isInt) { + intRegAssignmentMap[regKey] = regValue; + } else { + fpRegAssignmentMap[regKey] = regValue; + } + } + + /* only for HandleLocalRaDebug */ + uint64 GetPregUsed(bool isInt) const { + if (isInt) { + return intPregUsed; + } else { + return fpPregUsed; + } + } + + void SetPregUsed(regno_t regNO, bool isInt) { + uint64 mask = 0; + if (isInt) { + mask = 1ULL << (regNO - R0); + if ((intPregUsed & mask) == 0) { + ++numIntPregUsed; + intPregUsed |= mask; + } + } else { + mask = 1ULL << (regNO - V0); + if ((fpPregUsed & mask) == 0) { + ++numFpPregUsed; + fpPregUsed |= mask; + } + } + } + + bool isInRegSpilled(regno_t regNO, bool isInt) const { + bool isSet; + if (isInt) { + isSet = IsBitArrElemSet(intRegSpilled, regNO); + } else { + isSet = IsBitArrElemSet(fpRegSpilled, regNO); + } + return isSet; + } + + void SetRegSpilled(regno_t regNO, bool isInt) { + if (isInt) { + SetBitArrElement(intRegSpilled, regNO); + } else { + SetBitArrElement(fpRegSpilled, regNO); + } + } + + uint64 GetPregs(bool isInt) const { + if (isInt) { + return intPregs; + } else { + return fpPregs; + } + } + + void SetPregs(regno_t regNO, bool isInt) { + if (isInt) { + intPregs |= 1ULL << (regNO - RegBaseUpdate(true)); + } else { + fpPregs |= 1ULL << (regNO - RegBaseUpdate(false)); + } + } + + void ClearPregs(regno_t regNO, bool isInt) { + if (isInt) { + intPregs &= ~(1ULL << (regNO - RegBaseUpdate(true))); + } else { + fpPregs &= ~(1ULL << (regNO - RegBaseUpdate(false))); + } + } + + bool IsPregAvailable(regno_t regNO, bool isInt) const { + bool isAvailable; + if (isInt) { + isAvailable = intPregs & (1ULL << (regNO - RegBaseUpdate(true))); + } else { + isAvailable = fpPregs & (1ULL << (regNO - RegBaseUpdate(false))); + } + return isAvailable; + } + + void InitPregs(uint32 intMax, uint32 fpMax, bool hasYield, const MapleSet &intSpillRegSet, + const MapleSet &fpSpillRegSet) { + uint32 intBase = R0; + uint32 fpBase = V0; + intPregs = (1ULL << (intMax + 1)) - 1; + fpPregs = (1ULL << (((fpMax + 1) + fpBase) - RegBaseUpdate(false))) - 1; + for (uint32 regNO : intSpillRegSet) { + ClearPregs(regNO + intBase, true); + } + for (uint32 regNO : fpSpillRegSet) { + ClearPregs(regNO + fpBase, false); + } + if (hasYield) { + ClearPregs(RYP, true); + } +#ifdef RESERVED_REGS + intPregs &= ~(1ULL << R16); + intPregs &= ~(1ULL << R17); +#endif /* RESERVED_REGS */ + } + + const MapleMap &GetIntRegAssignmentMap() const { + return intRegAssignmentMap; + } + + const MapleMap &GetFpRegAssignmentMap() const { + return fpRegAssignmentMap; + } + + const MapleMap &GetUseInfo() const { + return useInfo; + } + + void SetUseInfoElem(regno_t regNO, uint16 info) { + useInfo[regNO] = info; + } + + void IncUseInfoElem(regno_t regNO) { + if (useInfo.find(regNO) != useInfo.end()) { + ++useInfo[regNO]; + } + } + + uint16 GetUseInfoElem(regno_t regNO) { + return useInfo[regNO]; + } + + void ClearUseInfo() { + useInfo.clear(); + } + + const MapleMap &GetDefInfo() const { + return defInfo; + } + + void SetDefInfoElem(regno_t regNO, uint16 info) { + defInfo[regNO] = info; + } + + uint16 GetDefInfoElem(regno_t regNO) { + return defInfo[regNO]; + } + + void IncDefInfoElem(regno_t regNO) { + if (defInfo.find(regNO) != defInfo.end()) { + ++defInfo[regNO]; + } + } + + void ClearDefInfo() { + defInfo.clear(); + } + + uint32 GetNumIntPregUsed() const { + return numIntPregUsed; + } + + uint32 GetNumFpPregUsed() const { + return numFpPregUsed; + } + + private: + void ClearBitArrElement(uint64 *vec) { + for (uint32 i = 0; i < buckets; ++i) { + vec[i] = 0UL; + } + } + + void SetBitArrElement(uint64 *vec, regno_t regNO) { + uint32 index = regNO / kU64; + uint64 bit = regNO % kU64; + vec[index] |= 1ULL << bit; + } + + /* The following local vars keeps track of allocation information in bb. */ + uint64 *intRegAssigned; /* in this set if vreg is assigned */ + uint64 *fpRegAssigned; + MapleMap intRegAssignmentMap; /* vreg -> preg map, which preg is the vreg assigned */ + MapleMap fpRegAssignmentMap; + uint64 intPregUsed = 0; /* pregs used in bb */ + uint64 fpPregUsed = 0; + uint64 *intRegSpilled; /* on this list if vreg is spilled */ + uint64 *fpRegSpilled; + + uint64 intPregs = 0; /* available regs for assignement */ + uint64 fpPregs = 0; + MapleMap useInfo; /* copy of local ra info for useCnt */ + MapleMap defInfo; /* copy of local ra info for defCnt */ + + uint32 numIntPregUsed = 0; + uint32 numFpPregUsed = 0; + uint32 buckets; +}; + +class SplitBBInfo { + public: + SplitBBInfo() = default; + + ~SplitBBInfo() = default; + + BB *GetCandidateBB() { + return candidateBB; + } + + const BB *GetCandidateBB() const { + return candidateBB; + } + + const BB *GetStartBB() const { + return startBB; + } + + void SetCandidateBB(BB &bb) { + candidateBB = &bb; + } + + void SetStartBB(BB &bb) { + startBB = &bb; + } + + private: + BB *candidateBB = nullptr; + BB *startBB = nullptr; +}; + +class GraphColorRegAllocator : public AArch64RegAllocator { + public: + GraphColorRegAllocator(CGFunc &cgFunc, MemPool &memPool) + : AArch64RegAllocator(cgFunc, memPool), + bbVec(alloc.Adapter()), + vregLive(alloc.Adapter()), + pregLive(alloc.Adapter()), + lrVec(alloc.Adapter()), + localRegVec(alloc.Adapter()), + bbRegInfo(alloc.Adapter()), + unconstrained(alloc.Adapter()), + constrained(alloc.Adapter()), + mustAssigned(alloc.Adapter()), +#ifdef OPTIMIZE_FOR_PROLOG + intDelayed(alloc.Adapter()), + fpDelayed(alloc.Adapter()), +#endif /* OPTIMIZE_FOR_PROLOG */ + intCallerRegSet(alloc.Adapter()), + intCalleeRegSet(alloc.Adapter()), + intSpillRegSet(alloc.Adapter()), + fpCallerRegSet(alloc.Adapter()), + fpCalleeRegSet(alloc.Adapter()), + fpSpillRegSet(alloc.Adapter()), + intCalleeUsed(alloc.Adapter()), + fpCalleeUsed(alloc.Adapter()) { + numVregs = cgFunc.GetMaxVReg(); + lrVec.resize(numVregs); + localRegVec.resize(cgFunc.NumBBs()); + bbRegInfo.resize(cgFunc.NumBBs()); + } + + ~GraphColorRegAllocator() override = default; + + bool AllocateRegisters() override; + std::string PhaseName() const { + return "regalloc"; + } + + private: + struct SetLiveRangeCmpFunc { + bool operator()(const LiveRange *lhs, const LiveRange *rhs) const { + if (fabs(lhs->GetPriority() - rhs->GetPriority()) <= 1e-6) { + /* + * This is to ensure the ordering is consistent as the reg# + * differs going through VtableImpl.mpl file. + */ + if (lhs->GetID() == rhs->GetID()) { + return lhs->GetRegNO() < rhs->GetRegNO(); + } else { + return lhs->GetID() < rhs->GetID(); + } + } + return (lhs->GetPriority() > rhs->GetPriority()); + } + }; + + template + void ForEachBBArrElem(const uint64 *vec, Func functor) const; + + template + void ForEachBBArrElemWithInterrupt(const uint64 *vec, Func functor) const; + + template + void ForEachRegArrElem(const uint64 *vec, Func functor) const; + + void PrintLiveUnitMap(const LiveRange &lr) const; + void PrintLiveRangeConflicts(const LiveRange &lr) const; + void PrintLiveBBBit(const LiveRange &li) const; + void PrintLiveRange(const LiveRange &li, const std::string &str) const; + void PrintLiveRanges() const; + void PrintLocalRAInfo(const std::string &str) const; + void PrintBBAssignInfo() const; + void PrintBBs() const; + + uint32 MaxIntPhysRegNum() const; + uint32 MaxFloatPhysRegNum() const; + bool IsReservedReg(AArch64reg regNO) const; + void InitFreeRegPool(); + void InitCCReg(); + bool IsUnconcernedReg(regno_t regNO) const; + bool IsUnconcernedReg(const RegOperand ®Opnd) const; + LiveRange *NewLiveRange(); + void CalculatePriority(LiveRange &lr) const; + bool CreateLiveRangeHandleLocal(regno_t regNO, const BB &bb, bool isDef); + LiveRange *CreateLiveRangeAllocateAndUpdate(regno_t regNO, const BB &bb, bool isDef, uint32 currId); + bool CreateLiveRange(regno_t regNO, BB &bb, bool isDef, uint32 currPoint, bool updateCount); + bool SetupLiveRangeByOpHandlePhysicalReg(RegOperand &op, Insn &insn, regno_t regNO, bool isDef); + void SetupLiveRangeByOp(Operand &op, Insn &insn, bool isDef, uint32 &numUses); + void SetupLiveRangeByRegNO(regno_t liveOut, BB &bb, uint32 currPoint); + bool UpdateInsnCntAndSkipUseless(Insn &insn, uint32 &currPoint); + void UpdateCallInfo(uint32 bbId); + void ClassifyOperand(std::unordered_set &pregs, std::unordered_set &vregs, const Operand &opnd); + void SetOpndConflict(const Insn &insn, bool onlyDef); + void UpdateOpndConflict(const Insn &insn, bool multiDef); + void SetupMustAssignedLiveRanges(const Insn &insn); + void ComputeLiveRangesForEachDefOperand(Insn &insn, bool &multiDef); + void ComputeLiveRangesForEachUseOperand(Insn &insn); + void ComputeLiveRangesUpdateIfInsnIsCall(const Insn &insn); + void ComputeLiveRangesUpdateLiveUnitInsnRange(BB &bb, uint32 currPoint); + void ComputeLiveRanges(); + MemOperand *CreateSpillMem(uint32 spillIdx); + bool CheckOverlap(uint64 val, uint32 &lastBitSet, uint32 &overlapNum, uint32 i) const; + void CheckInterference(LiveRange &lr1, LiveRange &lr2) const; + void BuildInterferenceGraphSeparateIntFp(std::vector &intLrVec, std::vector &fpLrVec); + void BuildInterferenceGraph(); + void SetBBInfoGlobalAssigned(uint32 bbID, regno_t regNO); + bool HaveAvailableColor(const LiveRange &lr, uint32 num) const; + void Separate(); + void SplitAndColorForEachLr(MapleVector &targetLrVec, bool isConstrained); + void SplitAndColor(); + void ColorForOptPrologEpilog(); + bool IsLocalReg(regno_t regNO) const; + bool IsLocalReg(LiveRange &lr) const; + void HandleLocalRaDebug(regno_t regNO, const LocalRegAllocator &localRa, bool isInt) const; + void HandleLocalRegAssignment(regno_t regNO, LocalRegAllocator &localRa, bool isInt); + void UpdateLocalRegDefUseCount(regno_t regNO, LocalRegAllocator &localRa, bool isDef, bool isInt) const; + void UpdateLocalRegConflict(regno_t regNO, LocalRegAllocator &localRa, bool isInt); + void HandleLocalReg(Operand &op, LocalRegAllocator &localRa, const BBAssignInfo *bbInfo, bool isDef, bool isInt); + void LocalRaRegSetEraseReg(LocalRegAllocator &localRa, regno_t regNO); + bool LocalRaInitRegSet(LocalRegAllocator &localRa, uint32 bbId); + void LocalRaInitAllocatableRegs(LocalRegAllocator &localRa, uint32 bbId); + void LocalRaForEachDefOperand(const Insn &insn, LocalRegAllocator &localRa, const BBAssignInfo *bbInfo); + void LocalRaForEachUseOperand(const Insn &insn, LocalRegAllocator &localRa, const BBAssignInfo *bbInfo); + void LocalRaPrepareBB(BB &bb, LocalRegAllocator &localRa); + void LocalRaFinalAssignment(LocalRegAllocator &localRa, BBAssignInfo &bbInfo); + void LocalRaDebug(BB &bb, LocalRegAllocator &localRa); + void LocalRegisterAllocator(bool allocate); + MemOperand *GetSpillOrReuseMem(LiveRange &lr, uint32 regSize, bool &isOutOfRange, Insn &insn, bool isDef); + void SpillOperandForSpillPre(Insn &insn, const Operand &opnd, RegOperand &phyOpnd, uint32 spillIdx, bool needSpill); + void SpillOperandForSpillPost(Insn &insn, const Operand &opnd, RegOperand &phyOpnd, uint32 spillIdx, bool needSpill); + Insn *SpillOperand(Insn &insn, const Operand &opnd, bool isDef, RegOperand &phyOpnd); + MemOperand *GetConsistentReuseMem(const uint64 *conflict, const std::set &usedMemOpnd, uint32 size, + RegType regType); + MemOperand *GetCommonReuseMem(const uint64 *conflict, const std::set &usedMemOpnd, uint32 size, + RegType regType); + MemOperand *GetReuseMem(uint32 vregNO, uint32 size, RegType regType); + MemOperand *GetSpillMem(uint32 vregNO, bool isDest, Insn &insn, AArch64reg regNO, bool &isOutOfRange); + bool SetAvailableSpillReg(std::unordered_set &cannotUseReg, LiveRange &lr, uint64 &usedRegMask); + void CollectCannotUseReg(std::unordered_set &cannotUseReg, const LiveRange &lr, Insn &insn); + regno_t PickRegForSpill(uint64 &usedRegMask, RegType regType, uint32 spillIdx, bool &needSpillLr); + bool SetRegForSpill(LiveRange &lr, Insn &insn, uint32 spillIdx, uint64 &usedRegMask, bool isDef); + bool GetSpillReg(Insn &insn, LiveRange &lr, uint32 &spillIdx, uint64 &usedRegMask, bool isDef); + RegOperand *GetReplaceOpndForLRA(Insn &insn, const Operand &opnd, uint32 &spillIdx, uint64 &usedRegMask, bool isDef); + RegOperand *GetReplaceOpnd(Insn &insn, const Operand &opnd, uint32 &spillIdx, uint64 &usedRegMask, bool isDef); + void MarkCalleeSaveRegs(); + void MarkUsedRegs(Operand &opnd, BBAssignInfo *bbInfo, uint64 &usedRegMask); + uint64 FinalizeRegisterPreprocess(BBAssignInfo *bbInfo, FinalizeRegisterInfo &fInfo, Insn &insn); + void FinalizeRegisters(); + + MapleVector::iterator GetHighPriorityLr(MapleVector &lrSet) const; + void UpdateForbiddenForNeighbors(LiveRange &lr) const; + void UpdatePregvetoForNeighbors(LiveRange &lr) const; + regno_t FindColorForLr(const LiveRange &lr) const; + bool ShouldUseCallee(LiveRange &lr, const MapleSet &calleeUsed, + const MapleVector &delayed) const; + void AddCalleeUsed(regno_t regNO, RegType regType); + bool AssignColorToLr(LiveRange &lr, bool isDelayed = false); + void PruneLrForSplit(LiveRange &lr, BB &bb, bool remove, std::set &candidateInLoop, + std::set &defInLoop); + bool UseIsUncovered(BB &bb, const BB &startBB); + void FindUseForSplit(LiveRange &lr, SplitBBInfo &bbInfo, bool &remove, + std::set &candidateInLoop, + std::set &defInLoop); + void FindBBSharedInSplit(LiveRange &lr, std::set &candidateInLoop, + std::set &defInLoop); + void ComputeBBForNewSplit(LiveRange &newLr, LiveRange &oldLr); + void ClearLrBBFlags(const std::set &member); + void ComputeBBForOldSplit(LiveRange &newLr, LiveRange &oldLr); + bool LrCanBeColored(const LiveRange &lr, const BB &bbAdded, std::unordered_set &conflictRegs); + void MoveLrBBInfo(LiveRange &oldLr, LiveRange &newLr, BB &bb); + bool ContainsLoop(const CGFuncLoops &loop, const std::set &loops) const; + void GetAllLrMemberLoops(LiveRange &lr, std::set &loop); + bool SplitLrShouldSplit(LiveRange &lr); + bool SplitLrFindCandidateLr(LiveRange &lr, LiveRange &newLr, std::unordered_set &conflictRegs); + void SplitLrHandleLoops(LiveRange &lr, LiveRange &newLr, const std::set &oldLoops, + const std::set &newLoops); + void SplitLrFixNewLrCallsAndRlod(LiveRange &newLr, const std::set &origLoops); + void SplitLrFixOrigLrCalls(LiveRange &lr); + void SplitLrUpdateInterference(LiveRange &lr); + void SplitLrUpdateRegInfo(LiveRange &origLr, LiveRange &newLr, std::unordered_set &conflictRegs); + void SplitLrErrorCheckAndDebug(LiveRange &origLr); + void SplitLr(LiveRange &lr); + + static constexpr uint16 kMaxUint16 = 0x7fff; + + MapleVector bbVec; + MapleUnorderedSet vregLive; + MapleUnorderedSet pregLive; + MapleVector lrVec; + MapleVector localRegVec; /* local reg info for each bb, no local reg if null */ + MapleVector bbRegInfo; /* register assignment info for each bb */ + MapleVector unconstrained; + MapleVector constrained; + MapleVector mustAssigned; +#ifdef OPTIMIZE_FOR_PROLOG + MapleVector intDelayed; + MapleVector fpDelayed; +#endif /* OPTIMIZE_FOR_PROLOG */ + MapleSet intCallerRegSet; /* integer caller saved */ + MapleSet intCalleeRegSet; /* callee */ + MapleSet intSpillRegSet; /* spill */ + MapleSet fpCallerRegSet; /* float caller saved */ + MapleSet fpCalleeRegSet; /* callee */ + MapleSet fpSpillRegSet; /* spill */ + MapleSet intCalleeUsed; + MapleSet fpCalleeUsed; + + uint32 bbBuckets = 0; /* size of bit array for bb (each bucket == 64 bits) */ + uint32 regBuckets = 0; /* size of bit array for reg (each bucket == 64 bits) */ + uint32 intRegNum = 0; /* total available int preg */ + uint32 fpRegNum = 0; /* total available fp preg */ + uint32 numVregs = 0; /* number of vregs when starting */ + regno_t ccReg = 0; + /* For spilling of spill register if there are none available + * Example, all 3 operands spilled + * sp_reg1 -> [spillMemOpnds[1]] + * sp_reg2 -> [spillMemOpnds[2]] + * ld sp_reg1 <- [addr-reg2] + * ld sp_reg2 <- [addr-reg3] + * reg1 <- reg2, reg3 sp_reg1 <- sp_reg1, sp_reg2 + * st sp_reg1 -> [addr-reg1] + * sp_reg1 <- [spillMemOpnds[1]] + * sp_reg2 <- [spillMemOpnds[2]] + */ + static constexpr size_t kSpillMemOpndNum = 4; + std::array spillMemOpnds = { nullptr }; + bool needExtraSpillReg = false; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_COLOR_RA_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_dependence.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_dependence.h new file mode 100644 index 0000000000000000000000000000000000000000..fda9ff551f5a08df114daecc9506d27c267a0793 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_dependence.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_DEPENDENCE_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_DEPENDENCE_H + +#include "dependence.h" +#include "cgfunc.h" +#include "riscv64_operand.h" + +namespace maplebe { +class AArch64DepAnalysis : public DepAnalysis { + public: + AArch64DepAnalysis(CGFunc &func, MemPool &mp, MAD &mad, bool beforeRA); + + ~AArch64DepAnalysis() override = default; + + void Run(BB &bb, MapleVector &nodes) override; + const std::string &GetDepTypeName(DepType depType) const override; + void DumpDepNode(DepNode &node) const override; + void DumpDepLink(DepLink &link, const DepNode *node) const override; + + protected: + void Init(BB &bb, MapleVector &nodes) override; + void ClearAllDepData() override; + void AnalysisAmbiInsns(BB &bb) override; + void AppendRegUseList(Insn &insn, regno_t regNO) override; + void AddDependence(DepNode& fromNode, DepNode &toNode, DepType depType) override; + void RemoveSelfDeps(Insn &insn) override; + void CombineClinit(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator) override; + void CombineDependence(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator, + bool isMemCombine = false) override; + void CombineMemoryAccessPair(DepNode &firstNode, DepNode &secondNode, bool useFirstOffset) override; + void BuildDepsUseReg(Insn &insn, regno_t regNO) override; + void BuildDepsDefReg(Insn &insn, regno_t regNO) override; + void BuildDepsAmbiInsn(Insn &insn) override; + void BuildDepsMayThrowInsn(Insn &insn) override; + bool NeedBuildDepsMem(const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd, Insn &memInsn) const; + void BuildDepsUseMem(Insn &insn, MemOperand &memOpnd) override; + void BuildDepsDefMem(Insn &insn, MemOperand &memOpnd) override; + void BuildAntiDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd); + void BuildOutputDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd); + void BuildDepsMemBar(Insn &insn) override; + void BuildDepsSeparator(DepNode &newSepNode, MapleVector &nodes) override; + void BuildDepsControlAll(DepNode &depNode, const MapleVector &nodes) override; + void BuildDepsAccessStImmMem(Insn &insn, bool isDest) override; + void BuildCallerSavedDeps(Insn &insn) override; + void BuildDepsBetweenControlRegAndCall(Insn &insn, bool isDest) override; + void BuildStackPassArgsDeps(Insn &insn) override; + void BuildDepsDirtyStack(Insn &insn) override; + void BuildDepsUseStack(Insn &insn) override; + void BuildDepsDirtyHeap(Insn &insn) override; + DepNode *BuildSeparatorNode() override; + bool IfInAmbiRegs(regno_t regNO) const override; + bool IsFrameReg(const RegOperand&) const override; + + private: + AArch64MemOperand *GetNextMemOperand(Insn &insn, AArch64MemOperand &aarchMemOpnd) const; + void BuildMemOpndDependency(Insn &insn, Operand &opnd, const AArch64OpndProp ®Prop); + void BuildOpndDependency(Insn &insn); + void BuildSpecialInsnDependency(Insn &insn, DepNode &depNode, const MapleVector &nodes); + void SeperateDependenceGraph(MapleVector &nodes, uint32 &nodeSum); + DepNode *GenerateDepNode(Insn &insn, MapleVector &nodes, int32 nodeSum, const MapleVector &comments); + void BuildAmbiInsnDependency(Insn &insn); + void BuildMayThrowInsnDependency(Insn &insn); + void UpdateRegUseAndDef(Insn &insn, DepNode &depNode, MapleVector &nodes); + void UpdateStackAndHeapDependency(DepNode &depNode, Insn &insn, const Insn &locInsn); + AArch64MemOperand *BuildNextMemOperandByByteSize(AArch64MemOperand &aarchMemOpnd, uint32 byteSize) const; + void AddDependence4InsnInVectorByType(MapleVector &insns, Insn &insn, const DepType &type); + void AddDependence4InsnInVectorByTypeAndCmp(MapleVector &insns, Insn &insn, const DepType &type); + void ReplaceDepNodeWithNewInsn(DepNode &firstNode, DepNode &secondNode, Insn& newInsn, bool isFromClinit) const; + void ClearDepNodeInfo(DepNode &depNode) const; + void AddEndSeparatorNode(MapleVector &nodes); + + Insn **regDefs = nullptr; + RegList **regUses = nullptr; + Insn *memBarInsn = nullptr; + bool hasAmbiRegs = false; + Insn *lastCallInsn = nullptr; + uint32 separatorIndex = 0; + Insn *lastFrameDef = nullptr; + MapleVector stackUses; + MapleVector stackDefs; + MapleVector heapUses; + MapleVector heapDefs; + MapleVector mayThrows; + /* instructions that can not across may throw instructions. */ + MapleVector ambiInsns; + /* register number that catch bb and cleanup bb uses. */ + MapleSet ehInRegs; + /* the bb to be scheduling currently */ + BB *curBB = nullptr; +}; +} + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_DEPENDENCE_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_ebo.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_ebo.h new file mode 100644 index 0000000000000000000000000000000000000000..f0b8167e3c4a268d2ffe9365ae768dbd912beed0 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_ebo.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EBO_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EBO_H + +#include "ebo.h" +#include "riscv64_operand.h" +#include "riscv64_cgfunc.h" + +namespace maplebe { +using namespace maple; + +class AArch64Ebo : public Ebo { + public: + AArch64Ebo(CGFunc &func, MemPool &memPool, LiveAnalysis *live, bool before, const std::string &phase) + : Ebo(func, memPool, live, before, phase), + callerSaveRegTable(eboAllocator.Adapter()) { + a64CGFunc = static_cast(cgFunc); + } + + ~AArch64Ebo() override = default; + + protected: + MapleVector callerSaveRegTable; + AArch64CGFunc *a64CGFunc; + int32 GetOffsetVal(const MemOperand &mem) const override; + OpndInfo *OperandInfoDef(BB ¤tBB, Insn ¤tInsn, Operand &localOpnd) override; + const RegOperand &GetRegOperand(const Operand &opnd) const override; + bool IsGlobalNeeded(Insn &insn) const override; + bool OperandEqSpecial(const Operand &op1, const Operand &op2) const override; + bool DoConstProp(Insn &insn, uint32 i, Operand &opnd) override; + bool Csel2Cset(Insn &insn, const MapleVector &opnds) override; + bool SimplifyConstOperand(Insn &insn, const MapleVector &opnds, + const MapleVector &opndInfo) override; + void BuildCallerSaveRegisters() override; + void DefineCallerSaveRegisters(InsnInfo &insnInfo) override; + void DefineReturnUseRegister(Insn &insn) override; + void DefineCallUseSpecialRegister(Insn &insn) override; + void DefineClinitSpecialRegisters(InsnInfo &insnInfo) override; + bool SpecialSequence(Insn &insn, const MapleVector &origInfos) override; + bool IsMovToSIMDVmov(Insn &insn, const Insn &replaceInsn) const override; + bool ChangeLdrMop(Insn &insn, const Operand &opnd) const override; + bool IsAdd(const Insn &insn) const override; + bool IsFmov(const Insn &insn) const override; + bool IsClinitCheck(const Insn &insn) const override; + bool IsLastAndBranch(BB &bb, Insn &insn) const override; + bool ResIsNotDefAndUse(Insn &insn) const override; + bool LiveOutOfBB(const Operand &opnd, const BB &bb) const override; + + private: + /* The number of elements in callerSaveRegTable must less then 45. */ + static constexpr int32 kMaxCallerSaveReg = 45; + bool IsZeroRegister(const Operand &opnd) const; + bool CheckCondCode(const CondOperand &cond) const; + bool SimplifyBothConst(BB &bb, Insn &insn, const AArch64ImmOperand &immOperand0, const AArch64ImmOperand &immOperand1, + uint32 opndSize); + AArch64CC_t GetReverseCond(const CondOperand &cond) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EBO_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_emitter.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_emitter.h new file mode 100644 index 0000000000000000000000000000000000000000..d4b35348f582760e2042b2437b9ced02cda31141 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_emitter.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EMITTER_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EMITTER_H + +#include "asm_emit.h" + +namespace maplebe { +using namespace maple; + +class AArch64AsmEmitter : public AsmEmitter { + public: + AArch64AsmEmitter(CG &cg, const std::string &asmFileName) : AsmEmitter(cg, asmFileName) {} + ~AArch64AsmEmitter() = default; + + void EmitRefToMethodDesc(FuncEmitInfo &funcEmitInfo, Emitter &emitter) override; + void EmitRefToMethodInfo(FuncEmitInfo &funcEmitInfo, Emitter &emitter) override; + void EmitMethodDesc(FuncEmitInfo &funcEmitInfo, Emitter &emitter) override; + void EmitFastLSDA(FuncEmitInfo &funcEmitInfo) override; + void EmitFullLSDA(FuncEmitInfo &funcEmitInfo) override; + void EmitBBHeaderLabel(FuncEmitInfo &funcEmitInfo, const std::string &name, LabelIdx labIdx) override; + void EmitJavaInsnAddr(FuncEmitInfo &funcEmitInfo) override; + void Run(FuncEmitInfo &funcEmitInfo) override; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_EMITTER_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_fixshortbranch.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_fixshortbranch.h new file mode 100644 index 0000000000000000000000000000000000000000..ca18119b3e712b8aeb81652c0a07cfa7c1c9f91d --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_fixshortbranch.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_FIXSHORTBRANCH_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_FIXSHORTBRANCH_H + +#include +#include "riscv64_cg.h" +#include "optimize_common.h" +#include "mir_builder.h" + +namespace maplebe { +class AArch64FixShortBranch { + public: + explicit AArch64FixShortBranch(CGFunc *cf) : cgFunc(cf) { + cg = cgFunc->GetCG(); + } + ~AArch64FixShortBranch() = default; + void FixShortBranches(); + + private: + CGFunc *cgFunc; + CG *cg; + bool DistanceCheck(const BB &bb, LabelIdx targLabIdx, uint32 targId); + void SetInsnId(); +}; /* class AArch64ShortBranch */ + +CGFUNCPHASE(CgFixShortBranch, "fixshortbranch") +} /* namespace maplebe */ +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_FIXSHORTBRANCH_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_fp_simd_regs.def b/src/mapleall/maple_be/include/cg/riscv64/riscv64_fp_simd_regs.def new file mode 100644 index 0000000000000000000000000000000000000000..c630b95c7775a55522b66f291463b7133a0007cd --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_fp_simd_regs.def @@ -0,0 +1,75 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +/* + * ARM Compiler armasm User Guide version 6.6. + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473j/deb1353594352617.html + * (retrieved on 3/24/2017) + */ +/* + * ID, 128 bit vector prefix, followed by scalar prefixes + * scalar prefixes: 8-bit, 16-bit, 32-bit, 64-bit, 128-bit, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill + * (e.g., we use D0 when V0 contains a 64-bit scalar FP number (aka, double)) + */ +FP_SIMD_REG(0 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(1 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(2 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(3 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(4 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(5 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(6 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(7 , "V", "B", "H", "S", "D", "Q", true, false, true, false, false) +FP_SIMD_REG(8 , "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(9 , "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(10, "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(11, "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(12, "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(13, "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(14, "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(15, "V", "B", "H", "S", "D", "Q", true, true, false, false, false) +FP_SIMD_REG(16, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(17, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(18, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(19, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(20, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(21, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(22, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(23, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(24, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(25, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(26, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(27, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(28, "V", "B", "H", "S", "D", "Q", true, false, false, false, false) +FP_SIMD_REG(29, "V", "B", "H", "S", "D", "Q", true, false, false, false, true) +FP_SIMD_REG(30, "V", "B", "H", "S", "D", "Q", true, false, false, true, false) +FP_SIMD_REG(31, "V", "B", "H", "S", "D", "Q", true, false, false, true, false) + +/* Alias ID */ +FP_SIMD_REG_ALIAS(0) +FP_SIMD_REG_ALIAS(1) +FP_SIMD_REG_ALIAS(2) +FP_SIMD_REG_ALIAS(3) +FP_SIMD_REG_ALIAS(4) +FP_SIMD_REG_ALIAS(5) +FP_SIMD_REG_ALIAS(6) +FP_SIMD_REG_ALIAS(7) + +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(0) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(1) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(2) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(3) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(4) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(5) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(6) */ +/* FP_SIMD_REG_ALIAS_64BIT_SCALAR(7) */ \ No newline at end of file diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_global.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_global.h new file mode 100644 index 0000000000000000000000000000000000000000..7312096ff239765ea5be466b624eebe49f1b32bd --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_global.h @@ -0,0 +1,266 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_GLOBAL_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_GLOBAL_H + +#include "global.h" +#include "riscv64_operand.h" + +namespace maplebe { +using namespace maple; + +class AArch64GlobalOpt : public GlobalOpt { + public: + explicit AArch64GlobalOpt(CGFunc &func) : GlobalOpt(func) {} + ~AArch64GlobalOpt() override = default; + void Run() override; +}; + +class OptimizeManager { + public: + explicit OptimizeManager(CGFunc &cgFunc) : cgFunc(cgFunc) {} + ~OptimizeManager() = default; + template + void Optimize() { + OptimizePattern optPattern(cgFunc); + optPattern.Run(); + } + private: + CGFunc &cgFunc; +}; + +class OptimizePattern { + public: + explicit OptimizePattern(CGFunc &cgFunc) : cgFunc(cgFunc) {} + virtual ~OptimizePattern() = default; + virtual bool CheckCondition(Insn &insn) = 0; + virtual void Optimize(Insn &insn) = 0; + virtual void Run() = 0; + bool OpndDefByOne(Insn &insn, int32 useIdx) const; + bool OpndDefByZero(Insn &insn, int32 useIdx) const; + bool OpndDefByOneOrZero(Insn &insn, int32 useIdx) const; + void ReplaceAllUsedOpndWithNewOpnd(const InsnSet &useInsnSet, uint32 regNO, + Operand &newOpnd, bool updateInfo) const; + + static bool InsnDefOne(Insn &insn); + static bool InsnDefZero(Insn &insn); + static bool InsnDefOneOrZero(Insn &insn); + protected: + virtual void Init() = 0; + CGFunc &cgFunc; +}; + +/* + * Do Forward prop when insn is mov + * mov xx, x1 + * ... // BBs and x1 is live + * mOp yy, xx + * + * => + * mov x1, x1 + * ... // BBs and x1 is live + * mOp yy, x1 + */ +class ForwardPropPattern : public OptimizePattern { + public: + explicit ForwardPropPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~ForwardPropPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + private: + InsnSet firstRegUseInsnSet; + std::set modifiedBB; +}; + +/* + * Do back propagate of vreg/preg when encount following insn: + * + * mov vreg/preg1, vreg2 + * + * back propagate reg1 to all vreg2's use points and def points, when all of them is in same bb + */ +class BackPropPattern : public OptimizePattern { + public: + explicit BackPropPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~BackPropPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + bool CheckAndGetOpnd(Insn &insn); + bool DestOpndHasUseInsns(Insn &insn); + bool DestOpndLiveOutToEHSuccs(Insn &insn); + bool CheckSrcOpndDefAndUseInsns(Insn &insn); + bool CheckPredefineInsn(Insn &insn); + bool CheckRedefineInsn(Insn &insn); + RegOperand *firstRegOpnd = nullptr; + RegOperand *secondRegOpnd = nullptr; + uint32 firstRegNO = 0; + uint32 secondRegNO = 0; + InsnSet srcOpndUseInsnSet; + Insn *defInsnForSecondOpnd = nullptr; +}; + +/* + * when w0 has only one valid bit, these tranformation will be done + * cmp w0, #0 + * cset w1, NE --> mov w1, w0 + * + * cmp w0, #0 + * cset w1, EQ --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, NE --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, EQ --> mov w1, w0 + * + * cmp w0, #0 + * cset w0, NE -->null + * + * cmp w0, #1 + * cset w0, EQ -->null + * + * condition: + * 1. the first operand of cmp instruction must has only one valid bit + * 2. the second operand of cmp instruction must be 0 or 1 + * 3. flag register of cmp isntruction must not be used later + */ +class CmpCsetPattern : public OptimizePattern { + public: + explicit CmpCsetPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~CmpCsetPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + Insn *nextInsn = nullptr; + int64 cmpConstVal = 0; + Operand *cmpFirstOpnd = nullptr; + Operand *cmpSecondOpnd = nullptr; + Operand *csetFirstOpnd = nullptr; +}; + +/* + * mov w5, #1 + * ... --> cset w5, NE + * mov w0, #0 + * csel w5, w5, w0, NE + * + * mov w5, #0 + * ... --> cset w5,EQ + * mov w0, #1 + * csel w5, w5, w0, NE + * + * condition: + * 1.all define points of w5 are defined by: mov w5, #1(#0) + * 2.all define points of w0 are defined by: mov w0, #0(#1) + * 3.w0 will not be used after: csel w5, w5, w0, NE(EQ) + */ +class CselPattern : public OptimizePattern { + public: + explicit CselPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~CselPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final {} + + private: + AArch64CC_t GetInverseCondCode(const CondOperand &cond) const; +}; + +/* + * uxtb w0, w0 --> null + * uxth w0, w0 --> null + * + * condition: + * 1. validbits(w0)<=8,16,32 + * 2. the first operand is same as the second operand + * + * uxtb w0, w1 --> null + * uxth w0, w1 --> null + * + * condition: + * 1. validbits(w1)<=8,16,32 + * 2. the use points of w0 has only one define point, that is uxt w0, w1 + */ +class RedundantUxtPattern : public OptimizePattern { + public: + explicit RedundantUxtPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~RedundantUxtPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + uint32 GetMaximumValidBit(Insn &insn, uint8 udIdx, InsnSet &insnChecked) const; + static uint32 GetInsnValidBit(Insn &insn); + InsnSet useInsnSet; + uint32 firstRegNO = 0; + Operand *secondOpnd = nullptr; +}; + +/* + * bl MCC_NewObj_flexible_cname bl MCC_NewObj_flexible_cname + * mov x21, x0 // [R203] + * str x0, [x29,#16] // local var: Reg0_R6340 [R203] --> str x0, [x29,#16] // local var: Reg0_R6340 [R203] + * ... (has call) ... (has call) + * mov x2, x21 // use of x21 ldr x2, [x29, #16] + * bl *** bl *** + */ +class LocalVarSaveInsnPattern : public OptimizePattern { + public: + explicit LocalVarSaveInsnPattern(CGFunc &cgFunc) : OptimizePattern(cgFunc) {} + ~LocalVarSaveInsnPattern() override = default; + bool CheckCondition(Insn &insn) final; + void Optimize(Insn &insn) final; + void Run() final; + + protected: + void Init() final; + + private: + bool CheckFirstInsn(Insn &firstInsn); + bool CheckSecondInsn(); + bool CheckAndGetUseInsn(Insn &firstInsn); + bool CheckLiveRange(Insn &firstInsn); + Operand *firstInsnSrcOpnd = nullptr; + Operand *firstInsnDestOpnd = nullptr; + Operand *secondInsnSrcOpnd = nullptr; + Operand *secondInsnDestOpnd = nullptr; + Insn *useInsn = nullptr; + Insn *secondInsn = nullptr; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_GLOBAL_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_ico.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_ico.h new file mode 100644 index 0000000000000000000000000000000000000000..8e543e831b868012c61ea6e2cfa00a23643c781f --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_ico.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ICO_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ICO_H +#include "ico.h" +#include "riscv64_isa.h" +#include "optimize_common.h" +#include "live.h" + +namespace maplebe { +class AArch64IfConversionOptimizer : public IfConversionOptimizer { + public: + AArch64IfConversionOptimizer(CGFunc &func, MemPool &memPool) : IfConversionOptimizer(func, memPool) {} + + ~AArch64IfConversionOptimizer() override = default; + void InitOptimizePatterns() override; +}; + +/* If-Then-Else pattern */ +class AArch64ICOPattern : public ICOPattern { + public: + explicit AArch64ICOPattern(CGFunc &func) : ICOPattern(func) {} + ~AArch64ICOPattern() override = default; + protected: + bool DoOpt(BB &cmpBB, BB *ifBB, BB *elseBB, BB &joinBB) override; + AArch64CC_t Encode(MOperator mOp, bool inverse) const; + Insn *BuildCondSet(const Insn &branch, RegOperand ®, bool inverse); + Insn *BuildCondSel(const Insn &branch, MOperator mOp, RegOperand &dst, RegOperand &src1, RegOperand &src2); + Insn *BuildCmpInsn(const Insn &condBr); + bool IsSetInsn(const Insn &insn, Operand *&dest, Operand *&src) const; + bool BuildCondMovInsn(BB &cmpBB, const BB &bb, const std::map &ifDestSrcMap, + const std::map &elseDestSrcMap, bool elseBBIsProcessed, + std::vector &generateInsn); + void GenerateInsnForImm(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, RegOperand &destReg, + std::vector &generateInsn); + Operand *GetDestReg(const std::map &destSrcMap, const RegOperand &destReg) const; + void GenerateInsnForReg(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, RegOperand &destReg, + std::vector &generateInsn); + RegOperand *GenerateRegAndTempInsn(Operand &dest, const RegOperand &destReg, std::vector &generateInsn); + bool CheckModifiedRegister(Insn &insn, std::map &destSrcMap, Operand &src, + Operand &dest) const; + bool CheckCondMoveBB(BB *bb, std::map &destSrcMap, + std::vector &destRegs, Operand *flagReg) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ICO_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_immediate.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_immediate.h new file mode 100644 index 0000000000000000000000000000000000000000..27075e4ee8432e09b1d115dd37eddea5e1796cdc --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_immediate.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_IMMEDIATE_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_IMMEDIATE_H + +#include "types_def.h" /* maple_ir/include/typedef.h */ +#include + +namespace maplebe { +bool IsBitmaskImmediate(maple::uint64 val, maple::uint32 bitLen); +bool IsMoveWidableImmediate(maple::uint64 val, maple::uint32 bitLen); +bool BetterUseMOVZ(maple::uint64 val); +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_IMMEDIATE_H */ \ No newline at end of file diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_insn.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_insn.h new file mode 100644 index 0000000000000000000000000000000000000000..8789795289d1bc1d140080047800249f9e476211 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_insn.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_INSN_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_INSN_H + +#include "riscv64_isa.h" +#include "insn.h" +#include "string_utils.h" +#include "riscv64_operand.h" +#include "common_utils.h" +namespace maplebe { +class AArch64Insn : public Insn { + public: + AArch64Insn(MemPool &memPool, MOperator mOp) : Insn(memPool, mOp) {} + + AArch64Insn(const AArch64Insn &originalInsn, MemPool &memPool) : Insn(memPool, originalInsn.mOp) { + InitWithOriginalInsn(originalInsn, *CG::GetCurCGFuncNoConst()->GetMemoryPool()); + } + + ~AArch64Insn() override = default; + + AArch64Insn &operator=(const AArch64Insn &p) = default; + + bool IsReturn() const override { + return mOp == MOP_xret; + } + + bool IsFixedInsn() const override { + return mOp == MOP_clinit || mOp == MOP_clinit_tail; + } + + bool IsComment() const override { + return mOp == MOP_comment; + } + + bool IsGoto() const override { + return mOp == MOP_xuncond; + } + + bool IsImmaterialInsn() const override { + return IsComment(); + } + + bool IsMachineInstruction() const override { + return (mOp > MOP_undef && mOp < MOP_comment); + } + + bool IsPseudoInstruction() const override { + return (mOp >= MOP_pseudo_param_def_x && mOp <= MOP_pseudo_eh_def_x); + } + + bool OpndIsDef(uint32 id) const override; + bool OpndIsUse(uint32 id) const override; + bool IsEffectiveCopy() const override { + return CopyOperands() >= 0; + } + + uint32 GetResultNum() const override; + uint32 GetOpndNum() const override; + Operand *GetResult(uint32 i) const override; + Operand *GetOpnd(uint32 i) const override; + Operand *GetMemOpnd() const override; + void SetOpnd(uint32 i, Operand &opnd) override; + void SetResult(uint32 index, Operand &res) override; + int32 CopyOperands() const override; + bool IsGlobal() const final { + return (mOp == MOP_xadrp || mOp == MOP_xadrpl12); + } + + bool IsDecoupleStaticOp() const final { + if (mOp == MOP_lazy_ldr_static) { + Operand *opnd1 = opnds[1]; + CHECK_FATAL(opnd1 != nullptr, "opnd1 is null!"); + auto *stImmOpnd = static_cast(opnd1); + return StringUtils::StartsWith(stImmOpnd->GetName(), namemangler::kDecoupleStaticValueStr); + } + return false; + } + + bool IsCall() const final; + bool IsTailCall() const final; + bool IsClinit() const final; + bool IsLazyLoad() const final; + bool IsAdrpLdr() const final; + bool IsArrayClassCache() const final; + bool HasLoop() const final; + bool IsSpecialIntrinsic() const final; + bool CanThrow() const final; + bool IsIndirectCall() const final { + return mOp == MOP_xblr; + } + + bool IsCallToFunctionThatNeverReturns() final; + bool MayThrow() final; + bool IsBranch() const final; + bool IsCondBranch() const final; + bool IsUnCondBranch() const final; + bool IsMove() const final; + bool IsLoad() const final; + bool IsLoadLabel() const final; + bool IsLoadStorePair() const; + bool IsStore() const final; + bool IsLoadPair() const final; + bool IsStorePair() const final; + bool IsLoadAddress() const final; + bool IsAtomic() const final; + bool IsYieldPoint() const override; + bool IsVolatile() const override; + bool IsFallthruCall() const final { + return (mOp == MOP_xblr || mOp == MOP_xbl); + } + bool IsMemAccessBar() const override; + bool IsMemAccess() const override; + + Operand *GetCallTargetOperand() const override { + ASSERT(IsCall(), "should be call"); + return &GetOperand(0); + } + uint32 GetAtomicNum() const override; + ListOperand *GetCallArgumentOperand() override { + ASSERT(IsCall(), "should be call"); + ASSERT(GetOperand(1).IsList(), "should be list"); + return &static_cast(GetOperand(1)); + } + + bool IsTargetInsn() const override { + return true; + } + + bool IsDMBInsn() const override; + + void Emit(const CG&, Emitter&) const override; + + void Dump() const override; + + bool Check() const override; + + bool IsDefinition() const override; + + bool IsDestRegAlsoSrcReg() const override; + + bool IsPartDef() const override; + + uint32 GetLatencyType() const override; + + bool CheckRefField(size_t opndIndex, bool isEmit) const; + + private: + void CheckOpnd(Operand &opnd, OpndProp &mopd) const; + void EmitClinit(const CG&, Emitter&) const; + void EmitAdrpLdr(const CG&, Emitter&) const; + void EmitLazyBindingRoutine(Emitter&) const; + void EmitClinitTail(Emitter&) const; + void EmitAdrpLabel(Emitter&) const; + void EmitLazyLoad(Emitter&) const; + void EmitLazyLoadStatic(Emitter&) const; + void EmitArrayClassCacheLoad(Emitter&) const; + void EmitCheckThrowPendingException(const CG&, Emitter&) const; + void EmitGetAndAddInt(Emitter &emitter) const; + void EmitGetAndSetInt(Emitter &emitter) const; + void EmitCompareAndSwapInt(Emitter &emitter) const; + void EmitStringIndexOf(Emitter &emitter) const; + void EmitCounter(const CG&, Emitter&) const; +}; + +class AArch64cleancallInsn : public AArch64Insn { + public: + AArch64cleancallInsn(MemPool &memPool, MOperator opc) + : AArch64Insn(memPool, opc), refSkipIndex(-1) {} + + AArch64cleancallInsn(const AArch64cleancallInsn &originalInsn, MemPool &memPool) + : AArch64Insn(originalInsn, memPool) { + refSkipIndex = originalInsn.refSkipIndex; + } + AArch64cleancallInsn &operator=(const AArch64cleancallInsn &p) = default; + ~AArch64cleancallInsn() override = default; + + void SetRefSkipIndex(int32 index) { + refSkipIndex = index; + } + + private: + int32 refSkipIndex; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_INSN_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_int_regs.def b/src/mapleall/maple_be/include/cg/riscv64/riscv64_int_regs.def new file mode 100644 index 0000000000000000000000000000000000000000..b3043c624614250d1e8435775c09ef9ae77ba121 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_int_regs.def @@ -0,0 +1,76 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +/* + * ARM Compiler armasm User Guide version 6.6. + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473j/deb1353594352617.html + * (retrieved on 3/24/2017) + * + * $ 4.1 Registers in AArch64 state + * + * There is no register named W31 or X31. + * Depending on the instruction, register 31 is either the stack + * pointer or the zero register. When used as the stack pointer, + * you refer to it as "SP". When used as the zero register, you refer + * to it as WZR in a 32-bit context or XZR in a 64-bit context. + * The zero register returns 0 when read and discards data when + * written (e.g., when setting the status register for testing). + */ +/* ID, 32-bit prefix, 64-bit prefix, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill */ +INT_REG(0 , "W", "X", true, false, true, false, false) +INT_REG(1 , "W", "X", true, false, true, false, false) +INT_REG(2 , "W", "X", true, false, true, false, false) +INT_REG(3 , "W", "X", true, false, true, false, false) +INT_REG(4 , "W", "X", true, false, true, false, false) +INT_REG(5 , "W", "X", true, false, true, false, false) +INT_REG(6 , "W", "X", true, false, true, false, false) +INT_REG(7 , "W", "X", true, false, true, false, false) +INT_REG(8 , "W", "X", true, false, false, false, false) +INT_REG(9 , "W", "X", true, false, false, false, false) +INT_REG(10, "W", "X", true, false, false, false, false) +INT_REG(11, "W", "X", true, false, false, false, false) +INT_REG(12, "W", "X", true, false, false, false, false) +INT_REG(13, "W", "X", true, false, false, false, false) +INT_REG(14, "W", "X", true, false, false, false, false) +INT_REG(15, "W", "X", true, false, false, false, true) +INT_REG(16, "W", "X", true, false, false, true, false) +INT_REG(17, "W", "X", true, false, false, true, false) +INT_REG(18, "W", "X", true, false, false, false, false) +INT_REG(19, "W", "X", true, true, false, false, false) +INT_REG(20, "W", "X", true, true, false, false, false) +INT_REG(21, "W", "X", true, true, false, false, false) +INT_REG(22, "W", "X", true, true, false, false, false) +INT_REG(23, "W", "X", true, true, false, false, false) +INT_REG(24, "W", "X", true, true, false, false, false) +INT_REG(25, "W", "X", true, true, false, false, false) +INT_REG(26, "W", "X", true, true, false, false, false) +INT_REG(27, "W", "X", true, true, false, false, false) +INT_REG(28, "W", "X", true, true, false, false, false) +INT_REG(29, "W", "X", false, true, false, false, false) +INT_REG(30, "W", "X", false, true, false, false, false) +/* + * Refer to ARM Compiler armasm User Guide version 6.6. $4.5 Predeclared core register names in AArch64 state + * We should not use "W" prefix in 64-bit context, though!! + */ +INT_REG(SP, "W", "" , false, false, false, false, false) +INT_REG(ZR, "W", "X", false, false, false, false, false) + +/* Alias ID, ID, 32-bit prefix, 64-bit prefix */ +INT_REG_ALIAS(FP, 29, "", "" ) +INT_REG_ALIAS(LR, 30, "", "" ) + +/* R19 is reserved for yieldpoint */ +INT_REG_ALIAS(YP, 19, "", "" ) + +INT_REG_ALIAS(LAST_INT_REG, 30, "", "" ) diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_isa.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_isa.h new file mode 100644 index 0000000000000000000000000000000000000000..a4f950c06dbed79cec7e1cc7c2820cef4f63dec6 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_isa.h @@ -0,0 +1,385 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ISA_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ISA_H + +#include "operand.h" +#include "mad.h" + +#define DEFINE_MOP(op, ...) op, +enum AArch64MOP_t : maple::uint32 { +#include "riscv64_md.def" + kMopLast +}; +#undef DEFINE_MOP + +namespace maplebe { +/* + * ARM Architecture Reference Manual (for ARMv8) + * D1.8.2 + */ +constexpr int kAarch64StackPtrAlignment = 16; + +constexpr int32 kOffsetAlign = 8; +constexpr uint32 kIntregBytelen = 8; /* 64-bit */ +constexpr uint32 kFpregBytelen = 8; /* only lower 64 bits are used */ +constexpr int kSizeOfFplr = 16; + +enum StpLdpImmBound : int { + kStpLdpImm64LowerBound = -512, + kStpLdpImm64UpperBound = 504, + kStpLdpImm32LowerBound = -256, + kStpLdpImm32UpperBound = 252 +}; + +enum StrLdrPerPostBound : int64 { + kStrLdrPerPostLowerBound = -256, + kStrLdrPerPostUpperBound = 255 +}; +constexpr int64 kStrAllLdrAllImmLowerBound = 0; +enum StrLdrImmUpperBound : int64 { + kStrLdrImm32UpperBound = 16380, /* must be a multiple of 4 */ + kStrLdrImm64UpperBound = 32760, /* must be a multiple of 8 */ + kStrbLdrbImmUpperBound = 4095, + kStrhLdrhImmUpperBound = 8190 +}; + +/* AArch64 Condition Code Suffixes */ +enum AArch64CC_t { +#define CONDCODE(a) CC_##a, +#include "riscv64_cc.def" +#undef CONDCODE + kCcLast +}; + +/* + * ARM Compiler armasm User Guide version 6.6. + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473j/deb1353594352617.html + * (retrieved on 3/24/2017) + * + * $ 4.1 Registers in AArch64 state + * ...When you use the 32-bit form of an instruction, the upper + * 32 bits of the source registers are ignored and + * the upper 32 bits of the destination register are set to zero. + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * There is no register named W31 or X31. + * Depending on the instruction, register 31 is either the stack + * pointer or the zero register. When used as the stack pointer, + * you refer to it as "SP". When used as the zero register, you refer + * to it as WZR in a 32-bit context or XZR in a 64-bit context. + * The zero register returns 0 when read and discards data when + * written (e.g., when setting the status register for testing). + */ +enum AArch64reg : uint32 { + kRinvalid = kInvalidRegNO, +/* integer registers */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) R##ID, +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +/* fp-simd registers */ +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) V##ID, +#define FP_SIMD_REG_ALIAS(ID) +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + kMaxRegNum, + kRFLAG, + kAllRegNum, +/* alias */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) R##ALIAS = R##ID, +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) +#define FP_SIMD_REG_ALIAS(ID) S##ID = V##ID, +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) +#define FP_SIMD_REG_ALIAS(ID) D##ID = V##ID, +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + kNArmRegisters +}; + +namespace AArch64isa { +static inline bool IsGPRegister(AArch64reg r) { + return R0 <= r && r <= RZR; +} + +static inline bool IsFPSIMDRegister(AArch64reg r) { + return V0 <= r && r <= V31; +} + +static inline bool IsPhysicalRegister(regno_t r) { + return r < kMaxRegNum; +} + +static inline RegType GetRegType(AArch64reg r) { + if (IsGPRegister(r)) { + return kRegTyInt; + } + if (IsFPSIMDRegister(r)) { + return kRegTyFloat; + } + ASSERT(false, "No suitable register type to return?"); + return kRegTyUndef; +} + +enum MemoryOrdering : uint32 { + kMoNone = 0, + kMoAcquire = (1ULL << 0), /* ARMv8 */ + kMoAcquireRcpc = (1ULL << 1), /* ARMv8.3 */ + kMoLoacquire = (1ULL << 2), /* ARMv8.1 */ + kMoRelease = (1ULL << 3), /* ARMv8 */ + kMoLorelease = (1ULL << 4) /* ARMv8.1 */ +}; +} /* namespace AArch64isa */ + +enum RegPropState : uint32 { + kRegPropUndef = 0, + kRegPropDef = 0x1, + kRegPropUse = 0x2 +}; +enum RegAddress : uint32 { + kRegHigh = 0x4, + kRegLow = 0x8 +}; +constexpr uint32 kMemLow12 = 0x10; +constexpr uint32 kLiteralLow12 = kMemLow12; +constexpr uint32 kPreInc = 0x20; +constexpr uint32 kPostInc = 0x40; +constexpr uint32 kLoadLiteral = 0x80; + +class RegProp { + public: + RegProp(RegType t, AArch64reg r, uint32 d) : regType(t), physicalReg(r), defUse(d) {} + virtual ~RegProp() = default; + const RegType &GetRegType() const { + return regType; + } + const AArch64reg &GetPhysicalReg() const { + return physicalReg; + } + uint32 GetDefUse() const { + return defUse; + } + private: + RegType regType; + AArch64reg physicalReg; + uint32 defUse; /* used for register use/define and other properties of other operand */ +}; + +class AArch64OpndProp : public OpndProp { + public: + AArch64OpndProp(Operand::OperandType t, RegProp p, uint8 s) : opndType(t), regProp(p), size(s) {} + virtual ~AArch64OpndProp() = default; + Operand::OperandType GetOperandType() { + return opndType; + } + + const RegProp &GetRegProp() const { + return regProp; + } + + bool IsPhysicalRegister() const { + return opndType == Operand::kOpdRegister && regProp.GetPhysicalReg() < kMaxRegNum; + } + + bool IsRegister() const { + return opndType == Operand::kOpdRegister; + } + + bool IsRegDef() const { + return opndType == Operand::kOpdRegister && (regProp.GetDefUse() & kRegPropDef); + } + + bool IsRegUse() const { + return opndType == Operand::kOpdRegister && (regProp.GetDefUse() & kRegPropUse); + } + + bool IsMemLow12() const { + return opndType == Operand::kOpdMem && (regProp.GetDefUse() & kMemLow12); + } + + bool IsLiteralLow12() const { + return opndType == Operand::kOpdStImmediate && (regProp.GetDefUse() & kLiteralLow12); + } + + bool IsDef() const { + return regProp.GetDefUse() & kRegPropDef; + } + + bool IsUse() const { + return regProp.GetDefUse() & kRegPropUse; + } + + bool IsLoadLiteral() const { + return regProp.GetDefUse() & kLoadLiteral; + } + + uint8 GetSize() const { + return size; + } + + uint32 GetOperandSize() const { + return static_cast(size); + } + + private: + Operand::OperandType opndType; + RegProp regProp; + uint8 size; +}; + +struct AArch64MD { + MOperator opc; + std::vector operand; + uint64 properties; + LatencyType latencyType; + const std::string &name; + const std::string &format; + uint32 atomicNum; /* indicate how many asm instructions it will emit. */ + + bool UseSpecReg() const { + return properties & USESPECREG; + } + + uint32 GetAtomicNum() const { + return atomicNum; + } + + bool IsCall() const { + return properties & ISCALL; + } + + bool HasLoop() const { + return properties & HASLOOP; + } + + bool CanThrow() const { + return properties & CANTHROW; + } + + AArch64OpndProp *GetOperand(int nth) const { + ASSERT(nth < operand.size(), "index of Operand should not be bigger than MaxOperandNum"); + return static_cast(operand[nth]); + } + + uint32 GetOperandSize() const { + if (properties & (ISLOAD | ISSTORE)) { + /* use memory operand */ + return GetOperand(1)->GetOperandSize(); + } + /* use dest operand */ + return GetOperand(0)->GetOperandSize(); + } + + bool Is64Bit() const { + return GetOperandSize() == k64BitSize; + } + + bool IsVolatile() const { + return ((properties & HASRELEASE) != 0) || ((properties & HASACQUIRE) != 0); + } + + bool IsMemAccessBar() const { + return (properties & (HASRELEASE | HASACQUIRE | HASACQUIRERCPC | HASLOACQUIRE | HASLORELEASE)) != 0; + } + + bool IsMemAccess() const { + return (properties & (ISLOAD | ISSTORE | ISLOADPAIR | ISSTOREPAIR)) != 0; + } + + bool IsBranch() const { + return (properties & (ISCONDBRANCH | ISUNCONDBRANCH)) != 0; + } + + bool IsCondBranch() const { + return (properties & (ISCONDBRANCH)) != 0; + } + + bool IsUnCondBranch() const { + return (properties & (ISUNCONDBRANCH)) != 0; + } + + bool IsMove() const { + return (properties & (ISMOVE)) != 0; + } + + bool IsDMB() const { + return (properties & (ISDMB)) != 0; + } + + bool IsLoad() const { + return (properties & (ISLOAD)) != 0; + } + + bool IsStore() const { + return (properties & (ISSTORE)) != 0; + } + + bool IsLoadPair() const { + return (properties & (ISLOADPAIR)) != 0; + } + + bool IsStorePair() const { + return (properties & (ISSTOREPAIR)) != 0; + } + + bool IsLoadStorePair() const { + return (properties & (ISLOADPAIR | ISSTOREPAIR)) != 0; + } + + bool IsLoadAddress() const { + return (properties & (ISLOADADDR)) != 0; + } + + bool IsAtomic() const { + return (properties & ISATOMIC) != 0; + } + + bool IsCondDef() const { + return properties & ISCONDDEF; + } + + bool IsPartDef() const { + return properties & ISPARTDEF; + } + + LatencyType GetLatencyType() const { + return latencyType; + } +}; + +/* + * We save callee-saved registers from lower stack area to upper stack area. + * If possible, we store a pair of registers (int/int and fp/fp) in the stack. + * The Stack Pointer has to be aligned at 16-byte boundary. + * On AArch64, kIntregBytelen == 8 (see the above) + */ +inline void GetNextOffsetCalleeSaved(int &offset) { + offset += (kIntregBytelen << 1); +} + +MOperator GetMopPair(MOperator mop); +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ISA_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_live.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_live.h new file mode 100644 index 0000000000000000000000000000000000000000..908c1e3c322c2ed769b261749fcbdf2ce863b52c --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_live.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_LIVE_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_LIVE_H + +#include "live.h" + +namespace maplebe { +class AArch64LiveAnalysis : public LiveAnalysis { + public: + AArch64LiveAnalysis(CGFunc &func, MemPool &memPool) : LiveAnalysis(func, memPool) {} + ~AArch64LiveAnalysis() override = default; + void GetBBDefUse(BB &bb) override; + bool CleanupBBIgnoreReg(uint32 reg) override; + void InitEhDefine(BB &bb) override; + private: + void CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse) const; + void GenerateReturnBBDefUse(BB &bb) const; + void ProcessCallInsnParam(BB &bb) const; + void ProcessListOpnd(BB &bb, Operand &opnd) const; + void ProcessMemOpnd(BB &bb, Operand &opnd) const; + void ProcessCondOpnd(BB &bb) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_LIVE_H */ \ No newline at end of file diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_lsra.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_lsra.h new file mode 100644 index 0000000000000000000000000000000000000000..a7ea5b7a3a879918af9a824b1c2c7d027ab37302 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_lsra.h @@ -0,0 +1,529 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_LSRA_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_LSRA_H +#include "riscv64_reg_alloc.h" +#include "riscv64_operand.h" +#include "riscv64_insn.h" +#include "riscv64_abi.h" + +namespace maplebe { +class LSRALinearScanRegAllocator : public AArch64RegAllocator { + enum RegInCatch : uint8 { + /* + * RA do not want to allocate certain registers if a live interval is + * only inside of catch blocks. + */ + kRegCatchNotInit = 0, /* unitialized state */ + kRegNOtInCatch = 1, /* interval is part or all outside of catch */ + kRegAllInCatch = 2, /* inteval is completely inside catch */ + }; + + enum RegInCleanup : uint8 { + /* Similar to reg_in_catch_t */ + kRegCleanupNotInit = 0, /* unitialized state */ + kRegAllInFirstbb = 1, /* interval is all in the first bb */ + kRegAllOutCleanup = 2, /* interval is all outside of cleanup, must in normal bb, may in first bb. */ + kRegInCleanupAndFirstbb = 3, /* inteval is in cleanup and first bb. */ + kRegInCleanupAndNormalbb = 4, /* inteval is in cleanup and non-first bb. */ + kRegAllInCleanup = 5 /* inteval is inside cleanup, except for bb 1 */ + }; + + class LiveInterval { + public: + explicit LiveInterval(MapleAllocator &mallocator) + : ranges(mallocator.Adapter()), + holes(mallocator.Adapter()), + usePositions(mallocator.Adapter()) {} + + virtual ~LiveInterval() = default; + + void AddRange(uint32 from, uint32 to); + void AddUsePos(uint32 pos); + + const Insn *GetIsCall() const { + return isCall; + } + + void SetIsCall(Insn &newIsCall) { + isCall = &newIsCall; + } + + uint32 GetPhysUse() const { + return physUse; + } + + void SetPhysUse(uint32 newPhysUse) { + physUse = newPhysUse; + } + + uint32 GetLastUse() const { + return lastUse; + } + + void SetLastUse(uint32 newLastUse) { + lastUse = newLastUse; + } + + uint32 GetRegNO() const { + return regNO; + } + + void SetRegNO(uint32 newRegNO) { + regNO = newRegNO; + } + + uint32 GetAssignedReg() const { + return assignedReg; + } + + void SetAssignedReg(uint32 newAssignedReg) { + assignedReg = newAssignedReg; + } + + uint32 GetFirstDef() const { + return firstDef; + } + + void SetFirstDef(uint32 newFirstDef) { + firstDef = newFirstDef; + } + + uint32 GetStackSlot() const { + return stackSlot; + } + + void SetStackSlot(uint32 newStkSlot) { + stackSlot = newStkSlot; + } + + RegType GetRegType() const { + return regType; + } + + void SetRegType(RegType newRegType) { + regType = newRegType; + } + + uint32 GetFirstAcrossedCall() const { + return firstAcrossedCall; + } + + void SetFirstAcrossedCall(uint32 newFirstAcrossedCall) { + firstAcrossedCall = newFirstAcrossedCall; + } + + bool IsEndByCall() const { + return endByCall; + } + + bool IsUseBeforeDef() const { + return useBeforeDef; + } + + void SetUseBeforeDef(bool newUseBeforeDef) { + useBeforeDef = newUseBeforeDef; + } + + bool IsShouldSave() const { + return shouldSave; + } + + void SetShouldSave(bool newShouldSave) { + shouldSave = newShouldSave; + } + + bool IsMultiUseInBB() const { + return multiUseInBB; + } + + void SetMultiUseInBB(bool newMultiUseInBB) { + multiUseInBB = newMultiUseInBB; + } + + bool IsThrowVal() const { + return isThrowVal; + } + + bool IsCallerSpilled() const { + return isCallerSpilled; + } + + void SetIsCallerSpilled(bool newIsCallerSpilled) { + isCallerSpilled = newIsCallerSpilled; + } + + bool IsMustAllocate() const { + return mustAllocate; + } + + void SetMustAllocate(bool newMustAllocate) { + mustAllocate = newMustAllocate; + } + + uint32 GetRefCount() const{ + return refCount; + } + + void SetRefCount(uint32 newRefCount) { + refCount = newRefCount; + } + + float GetPriority() const { + return priority; + } + + void SetPriority(float newPriority) { + priority = newPriority; + } + + const MapleVector> &GetRanges() const { + return ranges; + } + + MapleVector> &GetRanges() { + return ranges; + } + + size_t GetRangesSize () const { + return ranges.size(); + } + + const MapleVector> &GetHoles() const { + return holes; + } + + void HolesPushBack(uint32 pair1, uint32 pair2) { + holes.push_back(std::pair(pair1, pair2)); + } + + void UsePositionsInsert(uint32 insertId) { + (void)usePositions.insert(insertId); + } + + const LiveInterval *GetLiParent() const { + return liveParent; + } + + void SetLiParent(LiveInterval *newLiParent) { + liveParent = newLiParent; + } + + void SetLiParentChild(LiveInterval *child) { + liveParent->SetLiChild(child); + } + + const LiveInterval *GetLiChild() const { + return liveChild; + } + + void SetLiChild(LiveInterval *newLiChild) { + liveChild = newLiChild; + } + + uint32 GetResultCount() const { + return resultCount; + } + + void SetResultCount(uint32 newResultCount) { + resultCount = newResultCount; + } + + void SetInCatchState() { + /* + * Once in REG_NOT_IN_CATCH, it is irreversible since once an interval + * is not in a catch, it is not completely in a catch. + */ + if (inCatchState == kRegNOtInCatch) { + return; + } + inCatchState = kRegAllInCatch; + } + + void SetNotInCatchState() { + inCatchState = kRegNOtInCatch; + } + + bool IsInCatch() const { + return (inCatchState == kRegAllInCatch); + } + + void SetInCleanupState() { + switch (inCleanUpState) { + case kRegCleanupNotInit: + inCleanUpState = kRegAllInCleanup; + break; + case kRegAllInFirstbb: + inCleanUpState = kRegInCleanupAndFirstbb; + break; + case kRegAllOutCleanup: + inCleanUpState = kRegInCleanupAndNormalbb; + break; + case kRegInCleanupAndFirstbb: + break; + case kRegInCleanupAndNormalbb: + break; + case kRegAllInCleanup: + break; + default: + ASSERT(false, "CG Internal error."); + break; + } + } + + void SetNotInCleanupState(bool isFirstBB) { + switch (inCleanUpState) { + case kRegCleanupNotInit: { + if (isFirstBB) { + inCleanUpState = kRegAllInFirstbb; + } else { + inCleanUpState = kRegAllOutCleanup; + } + break; + } + case kRegAllInFirstbb: { + if (!isFirstBB) { + inCleanUpState = kRegAllOutCleanup; + } + break; + } + case kRegAllOutCleanup: + break; + case kRegInCleanupAndFirstbb: { + if (!isFirstBB) { + inCleanUpState = kRegInCleanupAndNormalbb; + } + break; + } + case kRegInCleanupAndNormalbb: + break; + case kRegAllInCleanup: { + if (isFirstBB) { + inCleanUpState = kRegInCleanupAndFirstbb; + } else { + inCleanUpState = kRegInCleanupAndNormalbb; + } + break; + } + default: + ASSERT(false, "CG Internal error."); + break; + } + } + + bool IsAllInCleanupOrFirstBB() const { + return (inCleanUpState == kRegAllInCleanup) || (inCleanUpState == kRegInCleanupAndFirstbb); + } + + bool IsAllOutCleanup() const { + return (inCleanUpState == kRegAllInFirstbb) || (inCleanUpState == kRegAllOutCleanup); + } + + private: + Insn *isCall = nullptr; + uint32 firstDef = 0; + uint32 lastUse = 0; + uint32 physUse = 0; + uint32 regNO = 0; + /* physical register, using cg defined reg based on R0/V0. */ + uint32 assignedReg = 0; + uint32 stackSlot = -1; + RegType regType = kRegTyUndef; + uint32 firstAcrossedCall = 0; + bool endByCall = false; + bool useBeforeDef = false; + bool shouldSave = false; + bool multiUseInBB = false; /* vreg has more than 1 use in bb */ + bool isThrowVal = false; + bool isCallerSpilled = false; /* only for R0(R1?) which are used for explicit incoming value of throwval; */ + bool mustAllocate = false; /* The register cannot be spilled (clinit pair) */ + uint32 refCount = 0; + float priority = 0.0; + MapleVector> ranges; + MapleVector> holes; + MapleSet usePositions; + LiveInterval *liveParent = nullptr; /* Current li is in aother li's hole. */ + LiveInterval *liveChild = nullptr; /* Another li is in current li's hole. */ + uint32 resultCount = 0; /* number of times this vreg has been written */ + uint8 inCatchState = kRegCatchNotInit; /* part or all of live interval is outside of catch blocks */ + uint8 inCleanUpState = kRegCleanupNotInit; /* part or all of live interval is outside of cleanup blocks */ + }; + + struct ActiveCmp { + bool operator()(const LiveInterval *lhs, const LiveInterval *rhs) const { + CHECK_NULL_FATAL(lhs); + CHECK_NULL_FATAL(rhs); + /* elements considered equal if return false */ + if (lhs == rhs) { + return false; + } + if (lhs->GetFirstDef() == rhs->GetFirstDef() && lhs->GetLastUse() == rhs->GetLastUse() && + lhs->GetRegNO() == rhs->GetRegNO() && lhs->GetRegType() == rhs->GetRegType() && + lhs->GetAssignedReg() == rhs->GetAssignedReg()) { + return false; + } + if (lhs->GetPhysUse() != 0 && rhs->GetPhysUse() != 0) { + if (lhs->GetFirstDef() == rhs->GetFirstDef()) { + return lhs->GetPhysUse() < rhs->GetPhysUse(); + } else { + return lhs->GetFirstDef() < rhs->GetFirstDef(); + } + } + /* At this point, lhs != rhs */ + if (lhs->GetLastUse() == rhs->GetLastUse()) { + return lhs->GetFirstDef() <= rhs->GetFirstDef(); + } + return lhs->GetLastUse() < rhs->GetLastUse(); + } + }; + + public: + LSRALinearScanRegAllocator(CGFunc &cgFunc, MemPool &memPool) + : AArch64RegAllocator(cgFunc, memPool), + liveIntervalsArray(alloc.Adapter()), + lastIntParamLi(alloc.Adapter()), + lastFpParamLi(alloc.Adapter()), + initialQue(alloc.Adapter()), + intParamQueue(alloc.Adapter()), + fpParamQueue(alloc.Adapter()), + callList(alloc.Adapter()), + active(alloc.Adapter()), + intCallerRegSet(alloc.Adapter()), + intCalleeRegSet(alloc.Adapter()), + intParamRegSet(alloc.Adapter()), + intSpillRegSet(alloc.Adapter()), + fpCallerRegSet(alloc.Adapter()), + fpCalleeRegSet(alloc.Adapter()), + fpParamRegSet(alloc.Adapter()), + fpSpillRegSet(alloc.Adapter()), + calleeUseCnt(alloc.Adapter()) { + for (int32 i = 0; i < AArch64Abi::kNumIntParmRegs; ++i) { + intParamQueue.push_back(initialQue); + fpParamQueue.push_back(initialQue); + } + } + ~LSRALinearScanRegAllocator() override = default; + + bool AllocateRegisters() override; + bool CheckForReg(Operand &opnd, Insn &insn, LiveInterval &li, regno_t regNO, bool isDef) const; + void PrintRegSet(const MapleSet &set, const std::string &str) const; + void PrintLiveInterval(LiveInterval &li, const std::string &str) const; + void PrintLiveRanges() const; + void PrintParamQueue(const std::string &str); + void PrintCallQueue(const std::string &str) const; + void PrintActiveList(const std::string &str, uint32 len = 0) const; + void PrintActiveListSimple() const; + void PrintLiveIntervals() const; + void DebugCheckActiveList() const; + void InitFreeRegPool(); + void RecordCall(Insn &insn); + void RecordPhysRegs(const RegOperand ®Opnd, uint32 insnNum, bool isDef); + void UpdateLiveIntervalState(const BB &bb, LiveInterval &li); + void SetupLiveInterval(Operand &opnd, Insn &insn, bool isDef, uint32 &nUses); + void UpdateLiveIntervalByLiveIn(const BB &bb, uint32 insnNum); + void UpdateParamLiveIntervalByLiveIn(const BB &bb, uint32 insnNum); + void ComputeLiveIn(BB &bb, uint32 insnNum); + void ComputeLiveOut(BB &bb, uint32 insnNum); + void ComputeLiveIntervalForEachOperand(Insn &insn); + void ComputeLiveInterval(); + bool PropagateRenameReg(Insn &insn, uint32 replaceReg, Operand &renameOperand); + void PropagateX0(); + void FindLowestPrioInActive(LiveInterval *&li, RegType regType = kRegTyInt, bool startRa = false); + void LiveIntervalAnalysis(); + bool OpndNeedAllocation(Insn &insn, Operand &opnd, bool isDef, uint32 insnNum); + void InsertParamToActive(Operand &opnd); + void InsertToActive(Operand &opnd, uint32 insnNum); + void ReturnPregToSet(LiveInterval &li, uint32 preg); + void ReleasePregToSet(LiveInterval &li, uint32 preg); + void UpdateActiveAtRetirement(uint32 insnID); + void RetireFromActive(const Insn &insn); + void AssignPhysRegsForInsn(Insn &insn); + RegOperand *GetReplaceOpnd(Insn &insn, Operand &opnd, uint32 &spillIdx, bool isDef); + void SetAllocMode(); + void CheckSpillCallee(); + void LinearScanRegAllocator(); + void FinalizeRegisters(); + void SpillOperand(Insn &insn, Operand &opnd, bool isDef, uint32 spillIdx); + void SetOperandSpill(Operand &opnd); + RegOperand *HandleSpillForInsn(Insn &insn, Operand &opnd); + MemOperand *GetSpillMem(uint32 vregNO, bool isDest, Insn &insn, AArch64reg regNO, bool &isOutOfRange); + void InsertCallerSave(Insn &insn, Operand &opnd, bool isDef); + uint32 GetRegFromSet(MapleSet &set, regno_t offset, LiveInterval &li, regno_t forcedReg = 0); + uint32 AssignSpecialPhysRegPattern(Insn &insn, LiveInterval &li); + uint32 FindAvailablePhyReg(LiveInterval &li, Insn &insn); + RegOperand *AssignPhysRegs(Operand &opnd, Insn &insn); + void SetupIntervalRangesByOperand(Operand &opnd, const Insn &insn, uint32 blockFrom, bool isDef, bool isUse); + void BuildIntervalRangesForEachOperand(const Insn &insn, uint32 blockFrom); + void BuildIntervalRanges(); + uint32 FillInHole(LiveInterval &li); + + private: + uint32 FindAvailablePhyRegByFastAlloc(LiveInterval &li); + bool NeedSaveAcrossCall(LiveInterval &li); + uint32 FindAvailablePhyReg(LiveInterval &li, Insn &insn, bool isIntReg); + + /* Comparison function for LiveInterval */ + static constexpr uint32 kMaxSpillRegNum = 3; + static constexpr uint32 kMaxFpSpill = 2; + MapleVector liveIntervalsArray; + MapleVector lastIntParamLi; + MapleVector lastFpParamLi; + MapleQueue initialQue; + using SingleQue = MapleQueue; + MapleVector intParamQueue; + MapleVector fpParamQueue; + MapleList callList; + MapleSet active; + MapleSet::iterator itFinded; + + /* Change these into vectors so it can be added and deleted easily. */ + MapleSet intCallerRegSet; /* integer caller saved */ + MapleSet intCalleeRegSet; /* callee */ + MapleSet intParamRegSet; /* parameter */ + MapleVector intSpillRegSet; /* integer regs put aside for spills */ + /* and register */ + uint32 intCallerMask = 0; /* bit mask for all possible caller int */ + uint32 intCalleeMask = 0; /* callee */ + uint32 intParamMask = 0; /* (physical-register) parameter */ + MapleSet fpCallerRegSet; /* float caller saved */ + MapleSet fpCalleeRegSet; /* callee */ + MapleSet fpParamRegSet; /* parameter */ + MapleVector fpSpillRegSet; /* float regs put aside for spills */ + MapleVector calleeUseCnt; /* Number of time callee reg is seen */ + uint32 fpCallerMask = 0; /* bit mask for all possible caller fp */ + uint32 fpCalleeMask = 0; /* callee */ + uint32 fpParamMask = 0; /* (physical-register) parameter */ + uint32 intBBDefMask = 0; /* locally which physical reg is defined */ + uint32 fpBBDefMask = 0; + uint32 debugSpillCnt = 0; + uint32 regUsedInBBSz = 0; + uint64 *regUsedInBB = nullptr; + uint32 maxInsnNum = 0; + regno_t minVregNum = 0; + regno_t maxVregNum = 0xFFFFFFFF; + bool fastAlloc = false; + bool spillAll = false; + bool needExtraSpillReg = false; + bool isSpillZero = false; + bool shouldOptIntCallee = false; + bool shouldOptFpCallee = false; + uint64 spillCount = 0; + uint64 reloadCount = 0; + uint64 callerSaveSpillCount = 0; + uint64 callerSaveReloadCount = 0; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_LSRA_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_md.def b/src/mapleall/maple_be/include/cg/riscv64/riscv64_md.def new file mode 100644 index 0000000000000000000000000000000000000000..f09cc5a6ab6bc8b0c53a452f3eef805719e45924 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_md.def @@ -0,0 +1,865 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +/* {mop, opnds, prop, latency, name, format, length} */ +/* MOP_undef, */ +DEFINE_MOP(MOP_undef, {},0,kLtUndef,"","",0) + +/* # Definitions */ + +/* AARCH64 MOVES */ +/* MOP_xmovrr */ +DEFINE_MOP(MOP_xmovrr, {mopdReg64ID,mopdReg64IS},ISMOVE,kLtAlu,"mov","0,1",1) +/* MOP_wmovrr */ +DEFINE_MOP(MOP_wmovrr, {mopdReg32ID,mopdReg32IS},ISMOVE,kLtAlu,"mov","0,1",1) +/* MOP_xmovri32 */ +DEFINE_MOP(MOP_xmovri32, {mopdReg32ID,mopdImm32},ISMOVE,kLtAlu,"mov","0,1",1) +/* MOP_xmovri64 */ +DEFINE_MOP(MOP_xmovri64, {mopdReg64ID,mopdImm64},ISMOVE,kLtAlu,"mov","0,1",1) + +/* MOP_xvmovsr */ +DEFINE_MOP(MOP_xvmovsr, {mopdReg32FD,mopdReg32IS},ISMOVE,kLtR2f,"fmov","0,1",1) +/* MOP_xvmovdr */ +DEFINE_MOP(MOP_xvmovdr, {mopdReg64FD,mopdReg64IS},ISMOVE,kLtR2f,"fmov","0,1",1) +/* MOP_xvmovrs */ +DEFINE_MOP(MOP_xvmovrs, {mopdReg32ID,mopdReg32FS},ISMOVE,kLtF2r,"fmov","0,1",1) +/* MOP_xvmovrd */ +DEFINE_MOP(MOP_xvmovrd, {mopdReg64ID,mopdReg64FS},ISMOVE,kLtF2r,"fmov","0,1",1) +/* MOP_xvmovs */ +DEFINE_MOP(MOP_xvmovs, {mopdReg32FD,mopdReg32FS},ISMOVE,kLtFpalu,"fmov","0,1",1) +/* MOP_xvmovd */ +DEFINE_MOP(MOP_xvmovd, {mopdReg64FD,mopdReg64FS},ISMOVE,kLtFpalu,"fmov","0,1",1) + +/* Vector SIMD mov */ +/* MOP_xmovrv */ +DEFINE_MOP(MOP_xvmovrv, {mopdReg32ID,mopdReg32FS},ISMOVE,kLtF2r,"mov","0,1",1) + +/* MOP_xadrp */ +DEFINE_MOP(MOP_xadrp, {mopdReg64ID,mopdLiteral},ISLOADADDR,kLtShift,"adrp","0,1",1) +/* MOP_xadr */ +DEFINE_MOP(MOP_xadri64, {mopdReg64ID,mopdImm64},ISLOADADDR,kLtShift,"adr","0,1",1) +/* MOP_xadrpl12 */ +DEFINE_MOP(MOP_xadrpl12, {mopdReg64ID,mopdReg64IS,mopdLiteralL12},0,kLtAlu,"add","0,1,2",1) + +/* MOP_xaddrrr AARCH64 Arithmetic: add */ +DEFINE_MOP(MOP_xaddrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAlu,"add","0,1,2",1) +/* MOP_xaddrrrs */ +DEFINE_MOP(MOP_xaddrrrs, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdBitShift64},0,kLtAluShift,"add","0,1,2,3",1) + +/* MOP_xxwaddrrre */ +DEFINE_MOP(MOP_xxwaddrrre, {mopdReg64ID,mopdReg64IS,mopdReg32IS,mopdExtendShift64},0,kLtAluShift,"add","0,1,2,3",1) +/* MOP_xaddrri24 */ + +DEFINE_MOP(MOP_xaddrri24, {mopdReg64ID,mopdReg64IS,mopdImm12,mopdLSL12},0,kLtShift,"add","0,1,2,3",1) +/* MOP_xaddrri12 */ +DEFINE_MOP(MOP_xaddrri12, {mopdReg64ID,mopdReg64IS,mopdImm12},0,kLtAlu,"add","0,1,2",1) +/* MOP_waddrrr */ +DEFINE_MOP(MOP_waddrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAlu,"add","0,1,2",1) +/* MOP_waddrrrs */ +DEFINE_MOP(MOP_waddrrrs, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdBitShift32},0,kLtAluShift,"add","0,1,2,3",1) +/* MOP_waddrri24 */ +DEFINE_MOP(MOP_waddrri24, {mopdReg32ID,mopdReg32IS,mopdImm12,mopdLSL12},0,kLtAluShift,"add","0,1,2,3",1) +/* MOP_waddrri12 */ +DEFINE_MOP(MOP_waddrri12, {mopdReg32ID,mopdReg32IS,mopdImm12},0,kLtAlu,"add","0,1,2",1) +/* MOP_dadd */ +DEFINE_MOP(MOP_dadd, {mopdReg64FD,mopdReg64FS,mopdReg64FS},0,kLtFpalu,"fadd","0,1,2",1) +/* MOP_sadd */ +DEFINE_MOP(MOP_sadd, {mopdReg32FD,mopdReg32FS,mopdReg32FS},0,kLtFpalu,"fadd","0,1,2",1) + +/* MOP_xsubrrr AARCH64 Arithmetic: sub */ +DEFINE_MOP(MOP_xsubrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAlu,"sub","0,1,2",1) +/* MOP_xsubrrrs */ +DEFINE_MOP(MOP_xsubrrrs, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdBitShift64},0,kLtAluShift,"sub","0,1,2,3",1) +/* MOP_xsubrri24 */ +DEFINE_MOP(MOP_xsubrri24, {mopdReg64ID,mopdReg64IS,mopdImm12,mopdLSL12},0,kLtAluShift,"sub","0,1,2,3",1) +/* MOP_xsubrri12 */ +DEFINE_MOP(MOP_xsubrri12, {mopdReg64ID,mopdReg64IS,mopdImm12},0,kLtAlu,"sub","0,1,2",1) +/* MOP_wsubrrr */ +DEFINE_MOP(MOP_wsubrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAlu,"sub","0,1,2",1) +/* MOP_wsubrrrs */ +DEFINE_MOP(MOP_wsubrrrs, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdBitShift32},0,kLtAluShift,"sub","0,1,2,3",1) +/* MOP_wsubrri24 */ +DEFINE_MOP(MOP_wsubrri24, {mopdReg32ID,mopdReg32IS,mopdImm12,mopdLSL12},0,kLtAluShift,"sub","0,1,2,3",1) +/* MOP_wsubrri12 */ +DEFINE_MOP(MOP_wsubrri12, {mopdReg32ID,mopdReg32IS,mopdImm12},0,kLtAlu,"sub","0,1,2",1) +/* MOP_dsub */ +DEFINE_MOP(MOP_dsub, {mopdReg64FD,mopdReg64FS,mopdReg64FS},0,kLtFpalu,"fsub","0,1,2",1) +/* MOP_ssub */ +DEFINE_MOP(MOP_ssub, {mopdReg32FD,mopdReg32FS,mopdReg32FS},0,kLtFpalu,"fsub","0,1,2",1) + +/* AARCH64 Arithmetic: multiply */ +/* MOP_Tbxmulrrr */ +DEFINE_MOP(MOP_xmulrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtMul,"mul","0,1,2",1) +/* MOP_wmulrrr */ +DEFINE_MOP(MOP_wmulrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtMul,"mul","0,1,2",1) +/* MOP_Tbxvmuls */ +DEFINE_MOP(MOP_xvmuls, {mopdReg32FD,mopdReg32FS,mopdReg32FS},0,kLtFpmul,"fmul","0,1,2",1) +/* MOP_Tbxvmuld */ +DEFINE_MOP(MOP_xvmuld, {mopdReg64FD,mopdReg64FS,mopdReg64FS},0,kLtFpmul,"fmul","0,1,2",1) +/*MOP_xsmullrrr */ +DEFINE_MOP(MOP_xsmullrrr, {mopdReg64ID,mopdReg32IS,mopdReg32IS},0,kLtMul,"smull","0,1,2",1) + +/* AARCH64 Conversions */ +/* MOP_xsxtb32 */ +DEFINE_MOP(MOP_xsxtb32, {mopdReg32ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"sxtb","0,1",1) +/* MOP_xsxtb64 */ +DEFINE_MOP(MOP_xsxtb64, {mopdReg64ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"sxtb","0,1",1) +/* MOP_xsxth32 */ +DEFINE_MOP(MOP_xsxth32, {mopdReg32ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"sxth","0,1",1) +/* MOP_xsxth64 */ +DEFINE_MOP(MOP_xsxth64, {mopdReg64ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"sxth","0,1",1) +/* MOP_xsxtw64 */ +DEFINE_MOP(MOP_xsxtw64, {mopdReg64ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"sxtw","0,1",1) + +/* MOP_xuxtb32 */ +DEFINE_MOP(MOP_xuxtb32, {mopdReg32ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"uxtb","0,1",1) +/* MOP_xuxth32 */ +DEFINE_MOP(MOP_xuxth32, {mopdReg32ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"uxth","0,1",1) +/* MOP_xuxtw64 Same as mov w0,w0 */ +DEFINE_MOP(MOP_xuxtw64, {mopdReg64ID,mopdReg32IS},ISCONVERSION,kLtAluShift,"uxtw","0,1",1) + +/* MOP_xvcvtfd */ +DEFINE_MOP(MOP_xvcvtfd, {mopdReg32FD,mopdReg64FS},ISCONVERSION,kLtFpalu,"fcvt","0,1",1) +/* MOP_xvcvtdf */ +DEFINE_MOP(MOP_xvcvtdf, {mopdReg64FD,mopdReg32FS},ISCONVERSION,kLtFpalu,"fcvt","0,1",1) + +/* MOP_vcvtrf fcvtzs w,s */ +DEFINE_MOP(MOP_vcvtrf, {mopdReg32ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtzs","0,1",1) +/* MOP_xvcvtrf fcvtzs x,s */ +DEFINE_MOP(MOP_xvcvtrf, {mopdReg64ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtzs","0,1",1) +/* MOP_vcvturf fcvtzu w,s */ +DEFINE_MOP(MOP_vcvturf, {mopdReg32ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtzu","0,1",1) +/* MOP_xvcvturf fcvtzu x,s */ +DEFINE_MOP(MOP_xvcvturf, {mopdReg64ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtzu","0,1",1) + +/* MOP_vcvtas fcvtas w,s (for round) */ +DEFINE_MOP(MOP_vcvtas, {mopdReg32ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtas","0,1",1) +/* MOP_xvcvtas fcvtas x,s */ +DEFINE_MOP(MOP_xvcvtas, {mopdReg64ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtas","0,1",1) +/* MOP_vcvtms fcvtms w,s (for floor) */ +DEFINE_MOP(MOP_vcvtms, {mopdReg32ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtms","0,1",1) +/* MOP_xvcvtms fcvtms x,s */ +DEFINE_MOP(MOP_xvcvtms, {mopdReg64ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtms","0,1",1) +/* MOP_vcvtps fcvtps w,s (for ceil) */ +DEFINE_MOP(MOP_vcvtps, {mopdReg32ID,mopdReg32FS},ISCONVERSION,kLtF2rCvt,"fcvtps","0,1",1) +/* MOP_xvcvtps fcvtps x,d */ +DEFINE_MOP(MOP_xvcvtps, {mopdReg64ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtps","0,1",1) + +/* MOP_vcvtrd fcvtzs w,d */ +DEFINE_MOP(MOP_vcvtrd, {mopdReg32ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtzs","0,1",1) +/* MOP_xvcvtrd fcvtzs x,d */ +DEFINE_MOP(MOP_xvcvtrd, {mopdReg64ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtzs","0,1",1) +/* MOP_vcvturd fcvtzu w,d */ +DEFINE_MOP(MOP_vcvturd, {mopdReg32ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtzu","0,1",1) +/* MOP_xvcvturd fcvtzu x,d */ +DEFINE_MOP(MOP_xvcvturd, {mopdReg64ID,mopdReg64FS},ISCONVERSION,kLtF2rCvt,"fcvtzu","0,1",1) + +/* MOP_vcvtfr scvtf s,w */ +DEFINE_MOP(MOP_vcvtfr, {mopdReg32FD,mopdReg32IS},ISCONVERSION,kLtR2fCvt,"scvtf","0,1",1) +/* MOP_xvcvtfr scvtf s,x */ +DEFINE_MOP(MOP_xvcvtfr, {mopdReg32FD,mopdReg64IS},ISCONVERSION,kLtR2fCvt,"scvtf","0,1",1) +/* MOP_vcvtufr ucvtf s,w */ +DEFINE_MOP(MOP_vcvtufr, {mopdReg32FD,mopdReg32IS},ISCONVERSION,kLtR2fCvt,"ucvtf","0,1",1) +/* MOP_xvcvtufr ucvtf s,x */ +DEFINE_MOP(MOP_xvcvtufr, {mopdReg32FD,mopdReg64IS},ISCONVERSION,kLtR2fCvt,"ucvtf","0,1",1) + +/* MOP_vcvtdr scvtf d,w */ +DEFINE_MOP(MOP_vcvtdr, {mopdReg64FD,mopdReg32IS},ISCONVERSION,kLtR2fCvt,"scvtf","0,1",1) +/* MOP_xvcvtdr scvtf d,x */ +DEFINE_MOP(MOP_xvcvtdr, {mopdReg64FD,mopdReg64IS},ISCONVERSION,kLtR2fCvt,"scvtf","0,1",1) +/* MOP_vcvtudr ucvtf d,w */ +DEFINE_MOP(MOP_vcvtudr, {mopdReg64FD,mopdReg32IS},ISCONVERSION,kLtR2fCvt,"ucvtf","0,1",1) +/* MOP_xvcvtudr ucvtf d,x */ +DEFINE_MOP(MOP_xvcvtudr, {mopdReg64FD,mopdReg64IS},ISCONVERSION,kLtR2fCvt,"ucvtf","0,1",1) + +/* MOP_xcsel */ +DEFINE_MOP(MOP_wcselrrrc, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdCond},ISCONDDEF,kLtAlu,"csel","0,1,2,3",1) +DEFINE_MOP(MOP_xcselrrrc, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdCond},ISCONDDEF,kLtAlu,"csel","0,1,2,3",1) + +/* MOP_xcset -- all conditions minus AL & NV */ +DEFINE_MOP(MOP_wcsetrc, {mopdReg32ID,mopdCond},ISCONDSET | ISCONDDEF,kLtAlu,"cset","0,1",1) +DEFINE_MOP(MOP_xcsetrc, {mopdReg64ID,mopdCond},ISCONDSET | ISCONDDEF,kLtAlu,"cset","0,1",1) + +/* MOP_xcsinc */ +DEFINE_MOP(MOP_wcsincrrrc, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdCond},ISCONDDEF,kLtAlu,"csinc","0,1,2,3",1) +DEFINE_MOP(MOP_xcsincrrrc, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdCond},ISCONDDEF,kLtAlu,"csinc","0,1,2,3",1) + +/* MOP_xcsinv */ +DEFINE_MOP(MOP_wcsinvrrrc, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdCond},ISCONDDEF,kLtAlu,"csinv","0,1,2,3",1) +DEFINE_MOP(MOP_xcsinvrrrc, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdCond},ISCONDDEF,kLtAlu,"csinv","0,1,2,3",1) + +/* MOP_xandrrr */ +DEFINE_MOP(MOP_xandrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAlu,"and","0,1,2",1) +/* MOP_xandrrrs */ +DEFINE_MOP(MOP_xandrrrs, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdBitShift64},0,kLtAluShift,"and","0,1,2,3",1) +/* MOP_xandrri13 */ +DEFINE_MOP(MOP_xandrri13, {mopdReg64ID,mopdReg64IS,mopdImm13},0,kLtAlu,"and","0,1,2",1) +/* MOP_wandrrr */ +DEFINE_MOP(MOP_wandrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAlu,"and","0,1,2",1) +/* MOP_wandrrrs */ +DEFINE_MOP(MOP_wandrrrs, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdBitShift32},0,kLtAluShift,"and","0,1,2,3",1) +/* MOP_wandrri12 */ +DEFINE_MOP(MOP_wandrri12, {mopdReg32ID,mopdReg32IS,mopdImm12},0,kLtAlu,"and","0,1,2",1) + +/* MOP_xiorrrr */ +DEFINE_MOP(MOP_xiorrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAlu,"orr","0,1,2",1) +/* MOP_xiorrrrs */ +DEFINE_MOP(MOP_xiorrrrs, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdBitShift64},0,kLtAlu,"orr","0,1,2,3",1) +/* MOP_xiorrri13 */ +DEFINE_MOP(MOP_xiorrri13, {mopdReg64ID,mopdReg64IS,mopdImm13},0,kLtAlu,"orr","0,1,2",1) +/* MOP_wiorrrr */ +DEFINE_MOP(MOP_wiorrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAlu,"orr","0,1,2",1) +/* MOP_wiorrrrs */ +DEFINE_MOP(MOP_wiorrrrs, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdBitShift32},0,kLtAlu,"orr","0,1,2,3",1) +/* MOP_wiorrri12 */ +DEFINE_MOP(MOP_wiorrri12, {mopdReg32ID,mopdReg32IS,mopdImm12},0,kLtAlu,"orr","0,1,2",1) + +/* MOP_xiorri13r */ +DEFINE_MOP(MOP_xiorri13r, {mopdReg64ID,mopdImm13,mopdReg64IS},0,kLtAlu,"orr","0,2,1",1) +/* MOP_wiorri12r */ +DEFINE_MOP(MOP_wiorri12r, {mopdReg32ID,mopdImm12,mopdReg32IS},0,kLtAlu,"orr","0,2,1",1) + +/* MOP_xeorrrr */ +DEFINE_MOP(MOP_xeorrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAlu,"eor","0,1,2",1) +/* MOP_xeorrrrs */ +DEFINE_MOP(MOP_xeorrrrs, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdBitShift64},0,kLtAlu,"eor","0,1,2,3",1) +/* MOP_xeorrri13 */ +DEFINE_MOP(MOP_xeorrri13, {mopdReg64ID,mopdReg64IS,mopdImm13},0,kLtAlu,"eor","0,1,2",1) +/* MOP_weorrrr */ +DEFINE_MOP(MOP_weorrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAlu,"eor","0,1,2",1) +/* MOP_weorrrrs */ +DEFINE_MOP(MOP_weorrrrs, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdBitShift32},0,kLtAlu,"eor","0,1,2,3",1) +/* MOP_weorrri12 */ +DEFINE_MOP(MOP_weorrri12, {mopdReg32ID,mopdReg32IS,mopdImm12},0,kLtAlu,"eor","0,1,2",1) + +/* MOP_weorrri8m */ +DEFINE_MOP(MOP_weorrri8m, {mopdReg32ID,mopdReg32IS,mopdImm8},0,kLtAlu,"eor","0,1,2",1) + +/* MOP_xnotrr */ +DEFINE_MOP(MOP_xnotrr, {mopdReg64ID,mopdReg64IS},0,kLtAlu,"mvn","0,1",1) +/* MOP_wnotrr */ +DEFINE_MOP(MOP_wnotrr, {mopdReg32ID,mopdReg32IS},0,kLtAlu,"mvn","0,1",1) + +/* MOP_wfmaxrrr */ +DEFINE_MOP(MOP_wfmaxrrr, {mopdReg32FD,mopdReg32FS,mopdReg32FS},0,kLtFpalu,"fmax","0,1,2",1) +/* MOP_xfmaxrrr */ +DEFINE_MOP(MOP_xfmaxrrr, {mopdReg64FD,mopdReg64FS,mopdReg64FS},0,kLtFpalu,"fmax","0,1,2",1) +/* MOP_wfminrrr */ +DEFINE_MOP(MOP_wfminrrr, {mopdReg32FD,mopdReg32FS,mopdReg32FS},0,kLtFpalu,"fmin","0,1,2",1) +/* MOP_xfminrrr */ +DEFINE_MOP(MOP_xfminrrr, {mopdReg64FD,mopdReg64FS,mopdReg64FS},0,kLtFpalu,"fmin","0,1,2",1) + +/* MOP_wsdivrrr */ +DEFINE_MOP(MOP_wsdivrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},CANTHROW,kLtDiv,"sdiv","0,1,2",1) +/* MOP_xsdivrrr */ +DEFINE_MOP(MOP_xsdivrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},CANTHROW,kLtDiv,"sdiv","0,1,2",1) +/* MOP_wudivrrr */ +DEFINE_MOP(MOP_wudivrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},CANTHROW,kLtDiv,"udiv","0,1,2",1) +/* MOP_xudivrrr */ +DEFINE_MOP(MOP_xudivrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},CANTHROW,kLtDiv,"udiv","0,1,2",1) + +/* MOP_wmsubrrrr */ +DEFINE_MOP(MOP_wmsubrrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdReg32IS},0,kLtMul,"msub","0,1,2,3",1) +/* MOP_xmsubrrrr */ +DEFINE_MOP(MOP_xmsubrrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdReg64IS},0,kLtMul,"msub","0,1,2,3",1) + +/* MPO_wubfxrri5i5 */ +DEFINE_MOP(MOP_wubfxrri5i5, {mopdReg32ID,mopdReg32IS,mopdImm5,mopdImm5},0,kLtAluShift,"ubfx","0,1,2,3",1) +/* MPO_xubfxrri6i6 */ +DEFINE_MOP(MOP_xubfxrri6i6, {mopdReg64ID,mopdReg64IS,mopdImm6,mopdImm6},0,kLtAluShift,"ubfx","0,1,2,3",1) + +/* MPO_wsbfxrri5i5 -- Signed Bitfield Extract */ +DEFINE_MOP(MOP_wsbfxrri5i5, {mopdReg32ID,mopdReg32IS,mopdImm5,mopdImm5},0,kLtAluShift,"sbfx","0,1,2,3",1) +/* MPO_xsbfxrri6i6 */ +DEFINE_MOP(MOP_xsbfxrri6i6, {mopdReg64ID,mopdReg64IS,mopdImm6,mopdImm6},0,kLtAluShift,"sbfx","0,1,2,3",1) + +/* MPO_wubfizrri5i5 -- Unsigned Bitfield Insert in Zero */ +DEFINE_MOP(MOP_wubfizrri5i5, {mopdReg32ID,mopdReg32IS,mopdImm5,mopdImm5},0,kLtAluShift,"ubfiz","0,1,2,3",1) +/* MPO_xubfizrri6i6 */ +DEFINE_MOP(MOP_xubfizrri6i6, {mopdReg64ID,mopdReg64IS,mopdImm6,mopdImm6},0,kLtAluShift,"ubfiz","0,1,2,3",1) + +/* MPO_wbfirri5i5 -- Bitfield Insert */ +DEFINE_MOP(MPO_wbfirri5i5, {mopdReg32ID,mopdReg32IS,mopdImm5,mopdImm5},0,kLtAluShift,"bfi","0,1,2,3",1) +/* MPO_xbfirri6i6 */ +DEFINE_MOP(MPO_xbfirri6i6, {mopdReg64ID,mopdReg64IS,mopdImm6,mopdImm6},0,kLtAluShift,"bfi","0,1,2,3",1) + + +/* MOP_xlslrri6,--- Logical Shift Left */ +DEFINE_MOP(MOP_xlslrri6, {mopdReg64ID,mopdReg64IS,mopdImm6},0,kLtAluShift,"lsl","0,1,2",1) +/* MOP_wlslrri5 */ +DEFINE_MOP(MOP_wlslrri5, {mopdReg32ID,mopdReg32IS,mopdImm8},0,kLtAluShift,"lsl","0,1,2",1) +/* MOP_xasrrri6, */ +DEFINE_MOP(MOP_xasrrri6, {mopdReg64ID,mopdReg64IS,mopdImm6},0,kLtAluShift,"asr","0,1,2",1) +/* MOP_wasrrri5 */ +DEFINE_MOP(MOP_wasrrri5, {mopdReg32ID,mopdReg32IS,mopdImm8},0,kLtAluShift,"asr","0,1,2",1) +/* MOP_xlsrrri6, */ +DEFINE_MOP(MOP_xlsrrri6, {mopdReg64ID,mopdReg64IS,mopdImm6},0,kLtAluShift,"lsr","0,1,2",1) +/* MOP_wlsrrri5 */ +DEFINE_MOP(MOP_wlsrrri5, {mopdReg32ID,mopdReg32IS,mopdImm8},0,kLtAluShift,"lsr","0,1,2",1) +/* MOP_xlslrrr, */ +DEFINE_MOP(MOP_xlslrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAluShiftReg,"lsl","0,1,2",1) +/* MOP_wlslrrr */ +DEFINE_MOP(MOP_wlslrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAluShiftReg,"lsl","0,1,2",1) +/* MOP_xasrrrr, */ +DEFINE_MOP(MOP_xasrrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAluShiftReg,"asr","0,1,2",1) +/* MOP_wasrrrr */ +DEFINE_MOP(MOP_wasrrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAluShiftReg,"asr","0,1,2",1) +/* MOP_xlsrrrr, */ +DEFINE_MOP(MOP_xlsrrrr, {mopdReg64ID,mopdReg64IS,mopdReg64IS},0,kLtAluShiftReg,"lsr","0,1,2",1) +/* MOP_wlsrrrr */ +DEFINE_MOP(MOP_wlsrrrr, {mopdReg32ID,mopdReg32IS,mopdReg32IS},0,kLtAluShiftReg,"lsr","0,1,2",1) + +/* MOP_wsfmovri imm8->s */ +DEFINE_MOP(MOP_wsfmovri, {mopdReg32FD,mopdImm8},ISMOVE,kLtFconst,"fmov","0,1",1) +/* MOP_xdfmovri imm8->d */ +DEFINE_MOP(MOP_xdfmovri, {mopdReg64FD,mopdImm8},ISMOVE,kLtFconst,"fmov","0,1",1) + +/* MOP_xcsneg -- Conditional Select Negation */ +DEFINE_MOP(MOP_wcsnegrrrc, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdCond},ISCONDDEF,kLtAlu,"csneg","0,1,2,3",1) +DEFINE_MOP(MOP_xcsnegrrrc, {mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdCond},ISCONDDEF,kLtAlu,"csneg","0,1,2,3",1) + +/* MOP_sabsrr */ +DEFINE_MOP(MOP_sabsrr, {mopdReg32FD,mopdReg32FS},0,kLtFpalu,"fabs","0,1",1) +/* MOP_dabsrr */ +DEFINE_MOP(MOP_dabsrr, {mopdReg64FD,mopdReg64FS},0,kLtFpalu,"fabs","0,1",1) + +/* neg i32 */ +DEFINE_MOP(MOP_winegrr, {mopdReg32ID,mopdReg32IS},0,kLtAlu,"neg","0,1",1) +/* neg i64 */ +DEFINE_MOP(MOP_xinegrr, {mopdReg64ID,mopdReg64IS},0,kLtAlu,"neg","0,1",1) +/* neg f32 */ +DEFINE_MOP(MOP_wfnegrr, {mopdReg32FD,mopdReg32FS},0,kLtFpalu,"fneg","0,1",1) +/* neg f64 */ +DEFINE_MOP(MOP_xfnegrr, {mopdReg64FD,mopdReg64FS},0,kLtFpalu,"fneg","0,1",1) + +/* MOP_sdivrrr */ +DEFINE_MOP(MOP_sdivrrr, {mopdReg32FD,mopdReg32FS,mopdReg32FS},CANTHROW,kLtAdvsimdDivS,"fdiv","0,1,2",1) +/* MOP_ddivrrr */ +DEFINE_MOP(MOP_ddivrrr, {mopdReg64FD,mopdReg64FS,mopdReg64FS},CANTHROW,kLtAdvsimdDivD,"fdiv","0,1,2",1) + +/* MOP_hcselrrrc --- Floating-point Conditional Select */ +DEFINE_MOP(MOP_hcselrrrc, {mopdReg16FD,mopdReg16FS,mopdReg16FS,mopdCond},ISCONDDEF,kLtFpalu,"fcsel","0,1,2,3",1) +/* MOP_scselrrrc */ +DEFINE_MOP(MOP_scselrrrc, {mopdReg32FD,mopdReg32FS,mopdReg32FS,mopdCond},ISCONDDEF,kLtFpalu,"fcsel","0,1,2,3",1) +/* MOP_dcselrrrc */ +DEFINE_MOP(MOP_dcselrrrc, {mopdReg64FD,mopdReg64FS,mopdReg64FS,mopdCond},ISCONDDEF,kLtFpalu,"fcsel","0,1,2,3",1) + +/* MOP_wldli -- load 32-bit literal */ +DEFINE_MOP(MOP_wldli, {mopdReg32ID,mopdLabel},ISLOAD|CANTHROW,kLtLoad1,"ldr","0,1",1) +/* MOP_xldli -- load 64-bit literal */ +DEFINE_MOP(MOP_xldli, {mopdReg64ID,mopdLabel},ISLOAD|CANTHROW,kLtLoad2,"ldr","0,1",1) +/* MOP_sldli -- load 32-bit literal */ +DEFINE_MOP(MOP_sldli, {mopdReg32FD,mopdLabel},ISLOAD|CANTHROW,kLtLoad1,"ldr","0,1",1) +/* MOP_dldli -- load 64-bit literal */ +DEFINE_MOP(MOP_dldli, {mopdReg64FD,mopdLabel},ISLOAD|CANTHROW,kLtLoad2,"ldr","0,1",1) + +/* AArch64 branches/calls */ +/* MOP_xbl -- branch with link (call); this is a special definition */ +DEFINE_MOP(MOP_xbl, {mopdFuncName,mopdLISTS},ISCALL|CANTHROW,kLtBranch,"bl","0",1) +/* MOP_xblr -- branch with link (call) to register; this is a special definition */ +DEFINE_MOP(MOP_xblr, {mopdReg64IS,mopdLISTS},ISCALL|CANTHROW,kLtBranch,"blr","0",1) + +/* AARCH64 LOADS */ +/* MOP_wldrsb --- Load Register Signed Byte */ +DEFINE_MOP(MOP_wldrsb, {mopdReg32ID,mopdMem8S},ISLOAD|CANTHROW,kLtLoad1,"ldrsb","0,1",1) +/* MOP_wldrb */ +DEFINE_MOP(MOP_wldrb, {mopdReg32ID,mopdMem8S},ISLOAD|CANTHROW,kLtLoad1,"ldrb","0,1",1) +/* MOP_wldrsh --- Load Register Signed Halfword */ +DEFINE_MOP(MOP_wldrsh, {mopdReg32ID,mopdMem16S},ISLOAD|CANTHROW,kLtLoad1,"ldrsh","0,1",1) +/* MOP_wldrh */ +DEFINE_MOP(MOP_wldrh, {mopdReg32ID, mopdMem16S},ISLOAD|CANTHROW,kLtLoad1,"ldrh","0,1",1) +/* MOP_wldr */ +DEFINE_MOP(MOP_wldr, {mopdReg32ID,mopdMem32S},ISLOAD|CANTHROW,kLtLoad1,"ldr","0,1",1) +/* MOP_xldr */ +DEFINE_MOP(MOP_xldr, {mopdReg64ID,mopdMem64S},ISLOAD|CANTHROW,kLtLoad2,"ldr","0,1",1) +/* MOP_bldr */ +DEFINE_MOP(MOP_bldr, {mopdReg8FD,mopdMem8S},ISLOAD|CANTHROW,kLtFLoad64,"ldr","0,1",1) +/* MOP_hldr */ +DEFINE_MOP(MOP_hldr, {mopdReg16FD,mopdMem16S},ISLOAD|CANTHROW,kLtFLoad64,"ldr","0,1",1) +/* MOP_sldr */ +DEFINE_MOP(MOP_sldr, {mopdReg32FD,mopdMem32S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) +/* MOP_dldr */ +DEFINE_MOP(MOP_dldr, {mopdReg64FD,mopdMem64S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) + +/* AArch64 LDP/LDPSW */ +/* MOP_wldp */ +DEFINE_MOP(MOP_wldp, {mopdReg32ID,mopdReg32ID,mopdMem32S},ISLOAD|ISLOADPAIR|CANTHROW,kLtLoad2,"ldp","0,1,2",1) +/* MOP_xldp */ +DEFINE_MOP(MOP_xldp, {mopdReg64ID,mopdReg64ID,mopdMem64S},ISLOAD|ISLOADPAIR|CANTHROW,kLtLoad3plus,"ldp","0,1,2",1) +/* MOP_xldpsw */ +DEFINE_MOP(MOP_xldpsw, {mopdReg64ID,mopdReg64ID,mopdMem32S},ISLOAD|ISLOADPAIR|CANTHROW,kLtLoad2,"ldpsw","0,1,2",1) +/* MOP_sldp */ +DEFINE_MOP(MOP_sldp, {mopdReg32FD,mopdReg32FD,mopdMem32S},ISLOAD|ISLOADPAIR|CANTHROW,kLtFLoad64,"ldp","0,1,2",1) +/* MOP_dldp */ +DEFINE_MOP(MOP_dldp, {mopdReg64FD,mopdReg64FD,mopdMem64S},ISLOAD|ISLOADPAIR|CANTHROW,kLtFLoadMany,"ldp","0,1,2",1) + +/* AARCH64 Load with Acquire semantics */ +/* MOP_wldarb */ +DEFINE_MOP(MOP_wldarb, {mopdReg32ID,mopdMem8S},ISLOAD|HASACQUIRE|CANTHROW,kLtLoad1,"ldarb","0,1",1) +/* MOP_wldarh */ +DEFINE_MOP(MOP_wldarh, {mopdReg32ID, mopdMem16S},ISLOAD|HASACQUIRE|CANTHROW,kLtLoad1,"ldarh","0,1",1) +/* MOP_wldar */ +DEFINE_MOP(MOP_wldar, {mopdReg32ID,mopdMem32S},ISLOAD|HASACQUIRE|CANTHROW,kLtLoad1,"ldar","0,1",1) +/* MOP_xldar */ +DEFINE_MOP(MOP_xldar, {mopdReg64ID,mopdMem64S},ISLOAD|HASACQUIRE|CANTHROW,kLtLoad1,"ldar","0,1",1) + +/* MOP_wmovkri16 */ +DEFINE_MOP(MOP_wmovkri16, {mopdReg32IDS,mopdImm16,mopdLSL4},ISMOVE|ISPARTDEF,kLtShift,"movk","0,1,2",1) +/* MOP_xmovkri16 */ +DEFINE_MOP(MOP_xmovkri16, {mopdReg64IDS,mopdImm16,mopdLSL6},ISMOVE|ISPARTDEF,kLtShift,"movk","0,1,2",1) + +/* MOP_wmovzri16 */ +DEFINE_MOP(MOP_wmovzri16, {mopdReg32ID,mopdImm16,mopdLSL4},ISMOVE|ISPARTDEF,kLtShift,"movz","0,1,2",1) +/* MOP_xmovzri16 */ +DEFINE_MOP(MOP_xmovzri16, {mopdReg64ID,mopdImm16,mopdLSL6},ISMOVE|ISPARTDEF,kLtShift,"movz","0,1,2",1) + +/* MOP_wmovnri16 */ +DEFINE_MOP(MOP_wmovnri16, {mopdReg32ID,mopdImm16,mopdLSL4},ISMOVE|ISPARTDEF,kLtShift,"movn","0,1,2",1) +/* MOP_xmovnri16 */ +DEFINE_MOP(MOP_xmovnri16, {mopdReg64ID,mopdImm16,mopdLSL6},ISMOVE|ISPARTDEF,kLtShift,"movn","0,1,2",1) + +/* AARCH64 Load exclusive with/without acquire semantics */ +DEFINE_MOP(MOP_wldxrb, {mopdReg32ID,mopdMem8S, },ISLOAD|ISATOMIC|CANTHROW,kLtLoad1,"ldxrb","0,1",1) +DEFINE_MOP(MOP_wldxrh, {mopdReg32ID,mopdMem16S},ISLOAD|ISATOMIC|CANTHROW,kLtLoad1,"ldxrh","0,1",1) +DEFINE_MOP(MOP_wldxr, {mopdReg32ID,mopdMem32S},ISLOAD|ISATOMIC|CANTHROW,kLtLoad1,"ldxr","0,1",1) +DEFINE_MOP(MOP_xldxr, {mopdReg64ID,mopdMem64S},ISLOAD|ISATOMIC|CANTHROW,kLtLoad1,"ldxr","0,1",1) + +DEFINE_MOP(MOP_wldaxrb,{mopdReg32ID,mopdMem8S, },ISLOAD|ISATOMIC|HASACQUIRE|CANTHROW,kLtLoad1,"ldaxrb","0,1",1) +DEFINE_MOP(MOP_wldaxrh,{mopdReg32ID,mopdMem16S},ISLOAD|ISATOMIC|HASACQUIRE|CANTHROW,kLtLoad1,"ldaxrh","0,1",1) +DEFINE_MOP(MOP_wldaxr, {mopdReg32ID,mopdMem32S},ISLOAD|ISATOMIC|HASACQUIRE|CANTHROW,kLtLoad1,"ldaxr","0,1",1) +DEFINE_MOP(MOP_xldaxr, {mopdReg64ID,mopdMem64S},ISLOAD|ISATOMIC|HASACQUIRE|CANTHROW,kLtLoad1,"ldaxr","0,1",1) + +DEFINE_MOP(MOP_wldaxp, {mopdReg32ID,mopdReg32ID,mopdMem32S},ISLOAD|ISLOADPAIR|ISATOMIC|HASACQUIRE|CANTHROW,kLtLoad1,"ldaxp","0,1,2",1) +DEFINE_MOP(MOP_xldaxp, {mopdReg64ID,mopdReg64ID,mopdMem64S},ISLOAD|ISLOADPAIR|ISATOMIC|HASACQUIRE|CANTHROW,kLtLoad1,"ldaxp","0,1,2",1) + +/* MOP_vsqrts */ +DEFINE_MOP(MOP_vsqrts, {mopdReg32FD,mopdReg32FS},CANTHROW,kLtAdvsimdDivS,"fsqrt","0,1",1) +/* MOP_vsqrtd */ +DEFINE_MOP(MOP_vsqrtd, {mopdReg64FD,mopdReg64FS},CANTHROW,kLtAdvsimdDivD,"fsqrt","0,1",1) + + +/* # Non Definitions */ +/* # As far as register allocation is concerned, the instructions below are non-definitions. */ + +/* MOP_beq */ +DEFINE_MOP(MOP_beq, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"beq","1",1) +/* MOP_bne */ +DEFINE_MOP(MOP_bne, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bne","1",1) +/* MOP_blt */ +DEFINE_MOP(MOP_blt, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"blt","1",1) +/* MOP_ble */ +DEFINE_MOP(MOP_ble, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"ble","1",1) +/* MOP_bgt */ +DEFINE_MOP(MOP_bgt, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bgt","1",1) +/* MOP_bge */ +DEFINE_MOP(MOP_bge, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bge","1",1) +/* MOP_blo equal to MOP_blt for unsigned comparison */ +DEFINE_MOP(MOP_blo, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"blo","1",1) +/* MOP_bls equal to MOP_bls for unsigned comparison */ +DEFINE_MOP(MOP_bls, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bls","1",1) +/* MOP_bhs equal to MOP_bge for unsigned comparison */ +DEFINE_MOP(MOP_bhs, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bhs","1",1) +/* MOP_bhi equal to MOP_bgt for float comparison */ +DEFINE_MOP(MOP_bhi, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bhi","1",1) +/* MOP_bpl equal to MOP_bge for float comparison */ +DEFINE_MOP(MOP_bpl, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bpl","1",1) +DEFINE_MOP(MOP_bmi, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bmi","1",1) +DEFINE_MOP(MOP_bvc, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bvc","1",1) +DEFINE_MOP(MOP_bvs, {mopdRegCCS,mopdLabel},ISCONDBRANCH,kLtBranch,"bvs","1",1) + +/* MOP_xret AARCH64 Specific */ +DEFINE_MOP(MOP_xret, {},CANTHROW,kLtBranch,"ret","",1) + +/* AARCH64 Floating-Point COMPARES signaling versions */ +/* MOP_hcmperi -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_hcmperi, {mopdRegCCD, mopdReg16FS,mopdFPZeroImm8},0,kLtFpalu,"fcmpe","1,2",1) +/* MOP_hcmperr -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_hcmperr, {mopdRegCCD, mopdReg16FS,mopdReg16FS},0,kLtFpalu,"fcmpe","1,2",1) + +/* MOP_scmperi -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_scmperi, {mopdRegCCD, mopdReg32FS,mopdFPZeroImm8},0,kLtFpalu,"fcmpe","1,2",1) +/* MOP_scmperr */ +DEFINE_MOP(MOP_scmperr, {mopdRegCCD, mopdReg32FS,mopdReg32FS},0,kLtFpalu,"fcmpe","1,2",1) + +/* MOP_dcmperi -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_dcmperi, {mopdRegCCD, mopdReg64FS,mopdFPZeroImm8},0,kLtFpalu,"fcmpe","1,2",1) +/* MOP_dcmperr */ +DEFINE_MOP(MOP_dcmperr, {mopdRegCCD, mopdReg64FS,mopdReg64FS},0,kLtFpalu,"fcmpe","1,2",1) + +/* AARCH64 Floating-Point COMPARES non-signaling (quiet) versions */ +/* MOP_hcmpqri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_hcmpqri, {mopdRegCCD, mopdReg16FS,mopdFPZeroImm8},0,kLtFpalu,"fcmp","1,2",1) +/* MOP_hcmpqrr -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_hcmpqrr, {mopdRegCCD, mopdReg16FS,mopdReg16FS},0,kLtFpalu,"fcmp","1,2",1) + +/* MOP_scmpqri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_scmpqri, {mopdRegCCD, mopdReg32FS,mopdFPZeroImm8},0,kLtFpalu,"fcmp","1,2",1) +/* MOP_scmpqrr */ +DEFINE_MOP(MOP_scmpqrr, {mopdRegCCD, mopdReg32FS,mopdReg32FS},0,kLtFpalu,"fcmp","1,2",1) + +/* MOP_dcmpqri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_dcmpqri, {mopdRegCCD, mopdReg64FS,mopdFPZeroImm8},0,kLtFpalu,"fcmp","1,2",1) +/* MOP_dcmpqrr */ +DEFINE_MOP(MOP_dcmpqrr, {mopdRegCCD, mopdReg64FS,mopdReg64FS},0,kLtFpalu,"fcmp","1,2",1) + +/* AARCH64 Integer COMPARES */ +/* MOP_wcmpri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_wcmpri, {mopdRegCCD, mopdReg32IS,mopdImm12},0,kLtAlu,"cmp","1,2",1) +/* MOP_wcmprr -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_wcmprr, {mopdRegCCD, mopdReg32IS,mopdReg32IS},0,kLtAlu,"cmp","1,2",1) +/* MOP_xcmpri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_xcmpri, {mopdRegCCD, mopdReg64IS,mopdImm12},0,kLtAlu,"cmp","1,2",1) +/* MOP_xcmprr -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_xcmprr, {mopdRegCCD, mopdReg64IS,mopdReg64IS},0,kLtAlu,"cmp","1,2",1) + +/* MOP_wccmpriic -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_wccmpriic, {mopdRegCCD, mopdReg32IS,mopdImm5,mopdImm4,mopdCond},0,kLtAlu,"ccmp","1,2,3,4",1) +/* MOP_wccmprric -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_wccmprric, {mopdRegCCD, mopdReg32IS,mopdReg32IS,mopdImm4,mopdCond},0,kLtAlu,"ccmp","1,2,3,4",1) +/* MOP_xccmpriic -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_xccmpriic, {mopdRegCCD, mopdReg64IS,mopdImm5,mopdImm4,mopdCond},0,kLtAlu,"ccmp","1,2,3,4",1) +/* MOP_xccmprric -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_xccmprric, {mopdRegCCD, mopdReg64IS,mopdReg64IS,mopdImm4,mopdCond},0,kLtAlu,"ccmp","1,2,3,4",1) + +/* MOP_wcmnri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_wcmnri, {mopdRegCCD, mopdReg32IS,mopdImm12},0,kLtAlu,"cmn","1,2",1) +/* MOP_wcmnrr -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_wcmnrr, {mopdRegCCD, mopdReg32IS,mopdReg32IS},0,kLtAlu,"cmn","1,2",1) +/* MOP_xcmnri -- AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_xcmnri, {mopdRegCCD, mopdReg64IS,mopdImm12},0,kLtAlu,"cmn","1,2",1) +/* MOP_xcmnrr -- register, shifted register, AArch64 cmp has no dest operand */ +DEFINE_MOP(MOP_xcmnrr, {mopdRegCCD, mopdReg64IS,mopdReg64IS},0,kLtAlu,"cmn","1,2",1) + +/* AArch64 branches */ +/* MOP_xbr -- branch to register */ +DEFINE_MOP(MOP_xbr, {mopdReg64IS},ISUNCONDBRANCH,kLtBranch,"br","0",1) +/* MOP_Tbbuncond */ +DEFINE_MOP(MOP_xuncond, {mopdLabel},ISUNCONDBRANCH,kLtBranch,"b","0",1) + +/* MOP_wcbnz --- Compare and Branch on Nonzero */ +DEFINE_MOP(MOP_wcbnz, {mopdReg32IS,mopdLabel},ISCONDBRANCH,kLtBranch,"cbnz","0,1",1) +/* MOP_xcbnz */ +DEFINE_MOP(MOP_xcbnz, {mopdReg64IS,mopdLabel},ISCONDBRANCH,kLtBranch,"cbnz","0,1",1) +/* MOP_wcbz --- Compare and Branch on zero */ +DEFINE_MOP(MOP_wcbz, {mopdReg32IS,mopdLabel},ISCONDBRANCH,kLtBranch,"cbz","0,1",1) +/* MOP_xcbz */ +DEFINE_MOP(MOP_xcbz, {mopdReg64IS,mopdLabel},ISCONDBRANCH,kLtBranch,"cbz","0,1",1) + +/* MOP_wtbnz --- Test bit and Branch if Nonzero */ +DEFINE_MOP(MOP_wtbnz, {mopdReg32IS,mopdImm8,mopdLabel},ISCONDBRANCH,kLtBranch,"tbnz","0,1,2",1) +/* MOP_xtbnz */ +DEFINE_MOP(MOP_xtbnz, {mopdReg64IS,mopdImm8,mopdLabel},ISCONDBRANCH,kLtBranch,"tbnz","0,1,2",1) +/* MOP_wtbz --- Test bit and Branch if Zero */ +DEFINE_MOP(MOP_wtbz, {mopdReg32IS,mopdImm8,mopdLabel},ISCONDBRANCH,kLtBranch,"tbz","0,1,2",1) +/* MOP_xtbz */ +DEFINE_MOP(MOP_xtbz, {mopdReg64IS,mopdImm8,mopdLabel},ISCONDBRANCH,kLtBranch,"tbz","0,1,2",1) + +/* AARCH64 STORES */ +/* MOP_wstrb -- Store Register Byte */ +DEFINE_MOP(MOP_wstrb, {mopdReg32IS,mopdMem8D},ISSTORE|CANTHROW,kLtStore1,"strb","0,1",1) +/* MOP_wstrh -- Store Register Halfword */ +DEFINE_MOP(MOP_wstrh, {mopdReg32IS,mopdMem16D},ISSTORE|CANTHROW,kLtStore1,"strh","0,1",1) +/* MOP_wstr -- Store Register Word */ +DEFINE_MOP(MOP_wstr, {mopdReg32IS,mopdMem32D},ISSTORE|CANTHROW,kLtStore1,"str","0,1",1) +/* MOP_xstr -- Store Register Double word */ +DEFINE_MOP(MOP_xstr, {mopdReg64IS,mopdMem64D},ISSTORE|CANTHROW,kLtStore2,"str","0,1",1) + +/* MOP_sstr -- Store Register SIMD/FP Float */ +DEFINE_MOP(MOP_sstr, {mopdReg32FS,mopdMem32D},ISSTORE|CANTHROW,kLtStore2,"str","0,1",1) +/* MOP_dstr -- Store Register SIMD/FP Double */ +DEFINE_MOP(MOP_dstr, {mopdReg64FS,mopdMem64D},ISSTORE|CANTHROW,kLtStore3plus,"str","0,1",1) + +/* AArch64 STP. */ +/* MOP_wstp */ +DEFINE_MOP(MOP_wstp, {mopdReg32IS,mopdReg32IS,mopdMem32D},ISSTORE|ISSTOREPAIR|CANTHROW,kLtStore2,"stp","0,1,2",1) +/* MOP_xstp */ +DEFINE_MOP(MOP_xstp, {mopdReg64IS,mopdReg64IS,mopdMem64D},ISSTORE|ISSTOREPAIR|CANTHROW,kLtStore3plus,"stp","0,1,2",1) +/* AArch64 does not define STPSW. It has no practical value. */ +/* MOP_sstp */ +DEFINE_MOP(MOP_sstp, {mopdReg32FS,mopdReg32FS,mopdMem32D},ISSTORE|ISSTOREPAIR|CANTHROW,kLtAdvsimdMulQ,"stp","0,1,2",1) +/* MOP_dstp */ +DEFINE_MOP(MOP_dstp, {mopdReg64FS,mopdReg64FS,mopdMem64D},ISSTORE|ISSTOREPAIR|CANTHROW,kLtAdvsimdMulQ,"stp","0,1,2",1) + +/* AARCH64 Store with Release semantics */ +/* MOP_wstlrb -- Store-Release Register Byte */ +DEFINE_MOP(MOP_wstlrb, {mopdReg32IS,mopdMem8D},ISSTORE|HASRELEASE|CANTHROW,kLtStore1,"stlrb","0,1",1) +/* MOP_wstlrh -- Store-Release Register Halfword */ +DEFINE_MOP(MOP_wstlrh, {mopdReg32IS,mopdMem16D},ISSTORE|HASRELEASE|CANTHROW,kLtStore1,"stlrh","0,1",1) +/* MOP_wstlr -- Store-Release Register Word */ +DEFINE_MOP(MOP_wstlr, {mopdReg32IS,mopdMem32D},ISSTORE|HASRELEASE|CANTHROW,kLtStore1,"stlr","0,1",1) +/* MOP_xstlr -- Store-Release Register Double word */ +DEFINE_MOP(MOP_xstlr, {mopdReg64IS,mopdMem64D},ISSTORE|HASRELEASE|CANTHROW,kLtStore1,"stlr","0,1",1) + +/* AARCH64 Store exclusive with/without release semantics */ +DEFINE_MOP(MOP_wstxrb, {mopdReg32ID,mopdReg32IS,mopdMem8D, },ISSTORE|ISATOMIC|CANTHROW,kLtStore1,"stxrb","0,1,2",1) +DEFINE_MOP(MOP_wstxrh, {mopdReg32ID,mopdReg32IS,mopdMem16D},ISSTORE|ISATOMIC|CANTHROW,kLtStore1,"stxrh","0,1,2",1) +DEFINE_MOP(MOP_wstxr, {mopdReg32ID,mopdReg32IS,mopdMem32D},ISSTORE|ISATOMIC|CANTHROW,kLtStore1,"stxr","0,1,2",1) +DEFINE_MOP(MOP_xstxr, {mopdReg32ID,mopdReg64IS,mopdMem64D},ISSTORE|ISATOMIC|CANTHROW,kLtStore1,"stxr","0,1,2",1) + +DEFINE_MOP(MOP_wstlxrb,{mopdReg32ID,mopdReg32IS,mopdMem8D, },ISSTORE|ISATOMIC|HASRELEASE|CANTHROW,kLtStore1,"stlxrb","0,1,2",1) +DEFINE_MOP(MOP_wstlxrh,{mopdReg32ID,mopdReg32IS,mopdMem16D},ISSTORE|ISATOMIC|HASRELEASE|CANTHROW,kLtStore1,"stlxrh","0,1,2",1) +DEFINE_MOP(MOP_wstlxr, {mopdReg32ID,mopdReg32IS,mopdMem32D},ISSTORE|ISATOMIC|HASRELEASE|CANTHROW,kLtStore1,"stlxr","0,1,2",1) +DEFINE_MOP(MOP_xstlxr, {mopdReg32ID,mopdReg64IS,mopdMem64D},ISSTORE|ISATOMIC|HASRELEASE|CANTHROW,kLtStore1,"stlxr","0,1,2",1) + +DEFINE_MOP(MOP_wstlxp, {mopdReg32ID,mopdReg32IS,mopdReg32IS,mopdMem64D},ISSTORE|ISSTOREPAIR|ISATOMIC|HASRELEASE|CANTHROW,kLtStore1,"stlxp","0,1,2,3",1) +DEFINE_MOP(MOP_xstlxp, {mopdReg32ID,mopdReg64IS,mopdReg64IS,mopdMem64D},ISSTORE|ISSTOREPAIR|ISATOMIC|HASRELEASE|CANTHROW,kLtStore1,"stlxp","0,1,2,3",1) + +/* Memory barriers */ +/* MOP_dmb_ishld */ +DEFINE_MOP(MOP_dmb_ishld, {}, HASACQUIRE|ISDMB,kLtBranch, "dmb\tishld", "",1) +/* MOP_dmb_ishst */ +DEFINE_MOP(MOP_dmb_ishst, {}, HASRELEASE|ISDMB,kLtBranch, "dmb\tishst", "",1) +/* MOP_dmb_ish */ +DEFINE_MOP(MOP_dmb_ish, {}, HASACQUIRE|HASRELEASE|ISDMB,kLtBranch, "dmb\tish", "",1) + +/* + * MOP_clinit + * will be emit to four instructions in a row: + * adrp xd, :got:__classinfo__Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B + * ldr xd, [xd,#:got_lo12:__classinfo__Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B] + * ldr xd, [xd,#112] + * ldr wzr, [xd] + */ +DEFINE_MOP(MOP_clinit, {mopdReg64ID,mopdLiteral},ISATOMIC|CANTHROW,kLtClinit,"intrinsic_clinit","0,1",4) + +/* + * MOP_counter + * will be emit to five instructions in a row: + * adrp x1, :got:__profile_table + idx + * ldr w17, [x1,#:got_lo12:__profile_table] + * add w17, w17, #1 + * str w17,[x1,,#:got_lo12:__profile_table] + */ +DEFINE_MOP(MOP_counter, {mopdReg64ID,mopdLiteral},ISATOMIC|CANTHROW,kLtClinit,"intrinsic_counter","0,1", 4) + +/* + * will be emit to two instrunctions in a row: + * ldr wd, [xs] // xd and xs should be differenct register + * ldr wd, [xd] + */ +DEFINE_MOP(MOP_lazy_ldr, {mopdReg32ID,mopdReg64IS},ISATOMIC|CANTHROW,kLtClinitTail,"intrinsic_lazyload","0,1",2) + +/* + * will be emit to three instrunctions in a row: + * adrp xd, :got:__staticDecoupleValueOffset$$xxx+offset + * ldr xd, [xd,#:got_lo12:__staticDecoupleValueOffset$$xx+offset] + * ldr xzr, [xd] + */ +DEFINE_MOP(MOP_lazy_ldr_static, {mopdReg64ID,mopdLiteral},ISATOMIC|CANTHROW,kLtAdrpLdr,"intrinsic_lazyloadstatic","0,1",3) + +/* A pseudo instruction followed MOP_lazy_ldr, to make sure xs and xd be allocated to different physical registers. */ +DEFINE_MOP(MOP_lazy_tail, {mopdReg32IS,mopdReg64IS},0,kLtUndef,"pseudo_lazy_tail","",0) + +/* will be emit to two instructions in a row: + * adrp xd, _PTR__cinf_Ljava_2Flang_2FSystem_3B + * ldr xd, [xd, #:lo12:_PTR__cinf_Ljava_2Flang_2FSystem_3B] + * MOP_adrp_ldr + */ +DEFINE_MOP(MOP_adrp_ldr, {mopdReg64ID, mopdLiteral},ISATOMIC|CANTHROW,kLtAdrpLdr,"intrinsic_adrpldr","0,1",2) + +/* will be emit to two instructions in a row: + * adrp xd, label + * add xd, xd, #:lo12:label + */ +DEFINE_MOP(MOP_adrp_label, {mopdReg64ID, mopdImm64},0,kLtAlu,"intrinsic_adrplabel","0,1", 2) + +/* + * will be emit to three instrunctions in a row: + * adrp xd, :got:__arrayClassCacheTable$$xxx+offset + * ldr xd, [xd,#:got_lo12:__arrayClassCacheTable$$xx+offset] + * ldr xzr, [xd] + */ +DEFINE_MOP(MOP_arrayclass_cache_ldr, {mopdReg64ID,mopdLiteral},ISATOMIC|CANTHROW,kLtAdrpLdr,"intrinsic_loadarrayclass","0,1",3) + +/* + * ldr x17, [xs,#112] + * ldr wzr, [x17] + */ +DEFINE_MOP(MOP_clinit_tail, {mopdReg64IS},ISATOMIC|CANTHROW,kLtClinitTail,"intrinsic_clinit_tail","0",2) + +/* + * intrinsic Unsafe.getAndAddInt + * intrinsic_get_add_int w0, xt, wt, ws, x1, x2, w3, label + * add xt, x1, x2 + * label: + * ldaxr w0, [xt] + * add wt, w0, w3 + * stlxr ws, wt, [xt] + * cbnz ws, label + */ +DEFINE_MOP(MOP_get_and_addI, {mopdReg32ID,mopdReg64ID,mopdReg32ID,mopdReg32ID,mopdReg64IS,mopdReg64IS,mopdReg32IS,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_get_add_int","",5) +/* + * intrinsic Unsafe.getAndAddLong + * intrinsic_get_add_long x0, xt, xs, ws, x1, x2, x3, ws, label + * add xt, x1, x2 + * label: + * ldaxr x0, [xt] + * add xs, x0, x3 + * stlxr ws, x2, [xt] + * cbnz ws, label + */ +DEFINE_MOP(MOP_get_and_addL, {mopdReg64ID,mopdReg64ID,mopdReg64ID,mopdReg32ID,mopdReg64IS,mopdReg64IS,mopdReg64IS,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_get_add_long","",5) + +/* + * intrinsic Unsafe.getAndSetInt + * intrinsic_get_set_int w0, xt, x1, x2, w3, label + * add xt, x1, x2 + * label: + * ldaxr w0, [xt] + * stlxr w2, w3, [xt] + * cbnz w2, label + */ +DEFINE_MOP(MOP_get_and_setI, {mopdReg32ID,mopdReg64ID,mopdReg32ID,mopdReg64IS,mopdReg64IS,mopdReg32IS,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_get_set_int","0,1,2,3,4",4) +/* + * intrinsic Unsafe.getAndSetLong + * intrinsic_get_set_long x0, x1, x2, x3, label + * add xt, x1, x2 + * label: + * ldaxr x0, [xt] + * stlxr w2, x3, [xt] + * cbnz w2, label + */ +DEFINE_MOP(MOP_get_and_setL, {mopdReg64ID,mopdReg64ID,mopdReg32ID,mopdReg64IS,mopdReg64IS,mopdReg64IS,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_get_set_long","0,1,2,3,4",4) + +/* + * intrinsic Unsafe.compareAndSwapInt + * intrinsic_compare_swap_int x0, xt, ws, x1, x2, w3, w4, lable1, label2 + * add xt, x1, x2 + * label1: + * ldaxr ws, [xt] + * cmp ws, w3 + * b.ne label2 + * stlxr ws, w4, [xt] + * cbnz ws, label1 + * label2: + * cset x0, eq + */ +DEFINE_MOP(MOP_compare_and_swapI, {mopdReg64ID,mopdReg64ID,mopdReg32ID,mopdReg64IS,mopdReg64IS,mopdReg32IS,mopdReg32IS,mopdLabel,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_compare_swap_int","0,1,2,3,4,5,6",7) +/* + * intrinsic Unsafe.compareAndSwapLong + * intrinsic_compare_swap_long x0, xt, xs, x1, x2, x3, x4, lable1, label2 + * add xt, x1, x2 + * label1: + * ldaxr xs, [xt] + * cmp xs, x3 + * b.ne label2 + * stlxr ws, x4, [xt] + * cbnz ws, label1 + * label2: + * cset x0, eq + */ +DEFINE_MOP(MOP_compare_and_swapL, {mopdReg64ID,mopdReg64ID,mopdReg64ID,mopdReg64IS,mopdReg64IS,mopdReg64IS,mopdReg64IS,mopdLabel,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_compare_swap_long","0,1,2,3,4,5,6",7) + +/* + * intrinsic String.indexOf(Ljava/lang/String;)I + * intrinsic_string_indexof w0, x1, w2, x3, w4, x5, x6, x7, x8, x9, w10, Label.FIRST_LOOP, Label.STR2_NEXT, Label.STR1_LOOP, Label.STR1_NEXT, Label.LAST_WORD, Label.NOMATCH, Label.RET + * cmp w4, w2 + * b.gt .Label.NOMATCH + * sub w2, w2, w4 + * sub w4, w4, #8 + * mov w10, w2 + * uxtw x4, w4 + * uxtw x2, w2 + * add x3, x3, x4 + * add x1, x1, x2 + * neg x4, x4 + * neg x2, x2 + * ldr x5, [x3,x4] + * .Label.FIRST_LOOP: + * ldr x7, [x1,x2] + * cmp x5, x7 + * b.eq .Label.STR1_LOOP + * .Label.STR2_NEXT: + * adds x2, x2, #1 + * b.le .Label.FIRST_LOOP + * b .Label.NOMATCH + * .Label.STR1_LOOP: + * adds x8, x4, #8 + * add x9, x2, #8 + * b.ge .Label.LAST_WORD + * .Label.STR1_NEXT: + * ldr x6, [x3,x8] + * ldr x7, [x1,x9] + * cmp x6, x7 + * b.ne .Label.STR2_NEXT + * adds x8, x8, #8 + * add x9, x9, #8 + * b.lt .Label.STR1_NEXT + * .Label.LAST_WORD: + * ldr x6, [x3] + * sub x9, x1, x4 + * ldr x7, [x9,x2] + * cmp x6, x7 + * b.ne .Label.STR2_NEXT + * add w0, w10, w2 + * b .Label.RET + * .Label.NOMATCH: + * mov w0, #-1 + * .Label.RET: + */ +DEFINE_MOP(MOP_string_indexof, {mopdReg32ID,mopdReg64IDS,mopdReg32IDS,mopdReg64IDS,mopdReg32IDS,mopdReg64ID,mopdReg64ID,mopdReg64ID,mopdReg64ID,mopdReg64ID,mopdReg32ID,mopdLabel,mopdLabel,mopdLabel,mopdLabel,mopdLabel,mopdLabel,mopdLabel},HASLOOP|CANTHROW,kLtBranch,"intrinsic_string_indexof","0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",36) + +/* MOP_tail_call_opt_xbl -- branch without link (call); this is a special definition */ +DEFINE_MOP(MOP_tail_call_opt_xbl, {mopdFuncName,mopdLISTS},CANTHROW,kLtBranch,"b","0", 1) +/* MOP_tail_call_opt_xblr -- branch without link (call) to register; this is a special definition */ +DEFINE_MOP(MOP_tail_call_opt_xblr, {mopdReg64IS,mopdLISTS},CANTHROW,kLtBranch,"br","0", 1) + +/* MOP_pseudo_param_def_x, */ +DEFINE_MOP(MOP_pseudo_param_def_x, {mopdReg64ID},0,kLtUndef,"//MOP_pseudo_param_def","0", 0) + +/* MOP_pseudo_param_def_w, */ +DEFINE_MOP(MOP_pseudo_param_def_w, {mopdReg32ID},0,kLtUndef,"//MOP_pseudo_param_def","0", 0) + +/* MOP_pseudo_param_def_d, */ +DEFINE_MOP(MOP_pseudo_param_def_d, {mopdReg64FD},0,kLtUndef,"//MOP_pseudo_param_def","0", 0) + +/* MOP_pseudo_param_def_s, */ +DEFINE_MOP(MOP_pseudo_param_def_s, {mopdReg32FD},0,kLtUndef,"//MOP_pseudo_param_def","0", 0) + +/* MOP_pseudo_param_store_x, */ +DEFINE_MOP(MOP_pseudo_param_store_x, {mopdMem64D},0,kLtUndef,"//MOP_pseudo_param_store_x","0", 0) + +/* MOP_pseudo_param_store_w, */ +DEFINE_MOP(MOP_pseudo_param_store_w, {mopdMem32D},0,kLtUndef,"//MOP_pseudo_param_store_w","0", 0) + +/* MOP_pseudo_ref_init_x, */ +DEFINE_MOP(MOP_pseudo_ref_init_x, {mopdMem64D},0,kLtUndef,"//MOP_pseudo_ref_init_x","0", 0) + +/* MOP_pseudo_ret_int, */ +DEFINE_MOP(MOP_pseudo_ret_int, {mopdReg64IS},0,kLtUndef,"//MOP_pseudo_ret_int","", 0) + +/* MOP_pseudo_ret_float, */ +DEFINE_MOP(MOP_pseudo_ret_float, {mopdReg64FS},0,kLtUndef,"//MOP_pseudo_ret_float","", 0) + +/* When exception occurs, R0 and R1 may be defined by runtime code. */ +/* MOP_pseudo_eh_def_x, */ +DEFINE_MOP(MOP_pseudo_eh_def_x, {mopdReg64ID},0,kLtUndef,"//MOP_pseudo_eh_def_x","0", 0) + +/* for comments */ +/* MOP_comment */ +DEFINE_MOP(MOP_comment, {mopdSTRING},0,kLtUndef,"//","0", 0) +/*MOP_nop */ +DEFINE_MOP(MOP_nop, {},0,kLtAlu,"nop","", 0) + + +/* A pseudo instruction that used for seperating dependence graph. */ +/* MOP_pseudo_dependence_seperator, */ +DEFINE_MOP(MOP_pseudo_dependence_seperator, {},0,kLtUndef,"//MOP_pseudo_dependence_seperator","0", 0) + + +/* A pseudo instruction that used for replacing MOP_clinit_tail after clinit merge in scheduling. */ +/* MOP_pseudo_none, */ +DEFINE_MOP(MOP_pseudo_none, {},0,kLtUndef,"//MOP_pseudo_none","0", 0) + +/* end of AArch64 instructions */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_memlayout.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_memlayout.h new file mode 100644 index 0000000000000000000000000000000000000000..cd259cef13731e7c048e5beaa5011d69d73ca3ff --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_memlayout.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_MEMLAYOUT_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_MEMLAYOUT_H + +#include "memlayout.h" +#include "riscv64_abi.h" + +namespace maplebe { +class AArch64SymbolAlloc : public SymbolAlloc { + public: + AArch64SymbolAlloc() = default; + + ~AArch64SymbolAlloc() = default; + + void SetRegisters(AArch64reg r0, AArch64reg r1) { + reg0 = r0; + reg1 = r1; + } + + inline bool IsRegister() { + return reg0 != kRinvalid; + } + + private: + AArch64reg reg0 = kRinvalid; + AArch64reg reg1 = kRinvalid; +}; + +/* + * On AArch64, stack frames are structured as follows: + * + * The stack grows downward -- full descending (SP points + * to a filled slot). + * + * Any of the parts of a frame is optional, i.e., it is + * possible to write a caller-callee pair in such a way + * that the particular part is absent in the frame. + * + * Before a call is made, the frame looks like: + * | | + * ||----------------------------| + * | args passed on the stack | (we call them up-formals) + * ||----------------------------|<- Stack Pointer + * | | + * + * V1. + * Right after a call is made + * | | + * ||----------------------------| + * | args passed on the stack | + * ||----------------------------|<- Stack Pointer + * | PREV_FP, PREV_LR | + * ||----------------------------|<- Frame Pointer + * + * After the prologue has run, + * | | + * ||----------------------------| + * | args passed on the stack | + * ||----------------------------| + * | PREV_FP, PREV_LR | + * ||----------------------------|<- Frame Pointer + * | callee-saved registers | + * ||----------------------------| + * | empty space. should have | + * | at least 16-byte alignment | + * ||----------------------------| + * | local variables | + * ||----------------------------| + * | variable-sized local vars | + * | (VLAs) | + * ||----------------------------|<- Stack Pointer + * + * callee-saved registers include + * 1. R19-R28 + * 2. R8 if return value needs to be returned + * thru memory and callee wants to use R8 + * 3. we don't need to save R19 if it is used + * as base register for PIE. + * 4. V8-V15 + * + * V2. (this way, we may be able to save + * on SP modifying instruction) + * Right after a call is made + * | | + * ||----------------------------| + * | args passed on the stack | + * ||----------------------------|<- Stack Pointer + * | | + * | empty space | + * | | + * ||----------------------------| + * | PREV_FP, PREV_LR | + * ||----------------------------|<- Frame Pointer + * + * After the prologue has run, + * | | + * ||----------------------------| + * | args passed on the stack | + * ||----------------------------| + * | callee-saved registers | + * | including those used for | + * | parameter passing | + * ||----------------------------| + * | empty space. should have | + * | at least 16-byte alignment | + * ||----------------------------| + * | local variables | + * ||----------------------------| + * | PREV_FP, PREV_LR | + * ||----------------------------|<- Frame Pointer + * | variable-sized local vars | + * | (VLAs) | + * ||----------------------------| + * | args to pass through stack | + * ||----------------------------| + */ +class AArch64MemLayout : public MemLayout { + public: + AArch64MemLayout(BECommon &b, MIRFunction &f, MapleAllocator &mallocator) + : MemLayout(b, f, mallocator) {} + + ~AArch64MemLayout() override = default; + + /* + * Returns stack space required for a call + * which is used to pass arguments that cannot be + * passed through registers + */ + uint32 ComputeStackSpaceRequirementForCall(StmtNode &stmt, int32 &aggCopySize, bool isIcall) override; + + void LayoutStackFrame(int32 &structCopySize, int32 &maxParmStackSize) override; + + void AssignSpillLocationsToPseudoRegisters() override; + + SymbolAlloc *AssignLocationToSpillReg(regno_t vrNum) override; + + int32 StackFrameSize(); + + int32 RealStackFrameSize(); + + const MemSegment &locals() const { + return segLocals; + } + + int32 GetSizeOfSpillReg() const { + return segSpillReg.GetSize(); + } + + int32 GetSizeOfLocals() const { + return segLocals.GetSize(); + } + + void SetSizeOfGRSaveArea(int32 sz) { + segGrSaveArea.SetSize(sz); + } + + int32 GetSizeOfGRSaveArea() const { + return segGrSaveArea.GetSize(); + } + + inline void SetSizeOfVRSaveArea(int32 sz) { + segVrSaveArea.SetSize(sz); + } + + int32 GetSizeOfVRSaveArea() const { + return segVrSaveArea.GetSize(); + } + + int32 GetSizeOfRefLocals() { + return segRefLocals.GetSize(); + } + + int32 GetRefLocBaseLoc() const; + int32 GetGRSaveAreaBaseLoc(); + int32 GetVRSaveAreaBaseLoc(); + + private: + MemSegment segRefLocals = MemSegment(kMsRefLocals); + /* callee saved register R19-R28 (10) */ + MemSegment segSpillReg = MemSegment(kMsSpillReg); + MemSegment segLocals = MemSegment(kMsLocals); /* these are accessed via Frame Pointer */ + MemSegment segGrSaveArea = MemSegment(kMsGrSaveArea); + MemSegment segVrSaveArea = MemSegment(kMsVrSaveArea); + int32 fixStackSize = 0; + void SetSizeAlignForTypeIdx(uint32 typeIdx, uint32 &size, uint32 &align) const; + void SetSegmentSize(AArch64SymbolAlloc &symbolAlloc, MemSegment &segment, uint32 typeIdx); + void LayoutVarargParams(); + void LayoutFormalParams(); + void LayoutActualParams(); + void LayoutLocalVariales(std::vector &tempVar, std::vector &returnDelays); + void LayoutEAVariales(std::vector &tempVar); + void LayoutReturnRef(std::vector &returnDelays, int32 &structCopySize, int32 &maxParmStackSize); +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_MEMLAYOUT_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_offset_adjust.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_offset_adjust.h new file mode 100644 index 0000000000000000000000000000000000000000..dd4e148d7a95c154e6fae52f219696fa4f135a85 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_offset_adjust.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OFFSET_ADJUST_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OFFSET_ADJUST_H + +#include "offset_adjust.h" +#include "riscv64_cgfunc.h" + +namespace maplebe { +using namespace maple; + +class AArch64FPLROffsetAdjustment : public FPLROffsetAdjustment { + public: + explicit AArch64FPLROffsetAdjustment(CGFunc &func) : FPLROffsetAdjustment(func) {} + + ~AArch64FPLROffsetAdjustment() override = default; + + void Run() override; + + private: + void AdjustmentOffsetForOpnd(Insn &insn, AArch64CGFunc &aarchCGFunc); + void AdjustmentOffsetForFPLR(); +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OFFSET_ADJUST_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_operand.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_operand.h new file mode 100644 index 0000000000000000000000000000000000000000..9d6488c10b542c6cc563a3eee6f2cc055c3f3964 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_operand.h @@ -0,0 +1,1039 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPERAND_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPERAND_H + +#include +#include +#include +#include +#include "riscv64_isa.h" +#include "operand.h" +#include "cg.h" +#include "riscv64_immediate.h" +#include "emit.h" +#include "common_utils.h" + + +namespace maplebe { +using namespace maple; + +class AArch64RegOperand : public RegOperand { + public: + AArch64RegOperand(regno_t regNO, uint32 size, RegType kind, uint32 flg = 0) + : RegOperand(regNO, size, kind), flag(flg) { + ASSERT(kind != kRegTyUndef, "Reg type must be specified"); + } + + ~AArch64RegOperand() override = default; + + void SetRefField(bool newIsRefField) { + isRefField = newIsRefField; + } + + bool IsInvalidRegister() const override { + return (GetRegisterNumber() == AArch64reg::kRinvalid); + } + + bool IsPhysicalRegister() const override { + return AArch64isa::IsPhysicalRegister(GetRegisterNumber()); + } + + bool IsVirtualRegister() const override { + return !IsPhysicalRegister(); + } + + bool IsBBLocalVReg() const override { + return IsVirtualRegister() && RegOperand::IsBBLocalVReg(); + } + + bool IsSaveReg(MIRType &ty, BECommon &beCommon) const override; + + static AArch64RegOperand &Get32bitZeroRegister() { + return zero32; + } + + static AArch64RegOperand &Get64bitZeroRegister() { + return zero64; + } + + static AArch64RegOperand &GetZeroRegister(uint32 bitLen) { + /* + * It is possible to have a bitLen < 32, eg stb. + * Set it to 32 if it is less than 32. + */ + if (bitLen < k32BitSize) { + bitLen = k32BitSize; + } + ASSERT((bitLen == k32BitSize || bitLen == k64BitSize), "illegal bit length = %d", bitLen); + return (bitLen == k32BitSize) ? Get32bitZeroRegister() : Get64bitZeroRegister(); + } + + bool IsZeroRegister() const override { + return GetRegisterNumber() == RZR; + } + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + bool operator==(const AArch64RegOperand &opnd) const; + + bool operator<(const AArch64RegOperand &opnd) const; + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override; + void Dump() const override { + std::array prims = { "U", "R", "V", "C", "X", "Vra" }; + std::array classes = { "[U]", "[I]", "[F]", "[CC]", "[X87]", "[Vra]" }; + bool isVirtual = IsVirtualRegister(); + ASSERT(regType < kRegTyLast, "unexpected regType"); + regno_t reg = isVirtual ? regNO : (regNO - 1); + LogInfo::MapleLogger() << (isVirtual ? "vreg:" : " reg:") << prims[regType] << reg + << " class: " << classes[regType] << " validBitNum: [" + << static_cast(validBitsNum) << "]"; + } + + bool IsSPOrFP() const override; + + private: + static AArch64RegOperand zero64; + static AArch64RegOperand zero32; + bool isRefField = false; + uint32 flag; +}; + +/* + * http://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly + * + * Unlike A32's "flexible second operand", there is no common + * immediate format in A64. For immediate-operand data-processing + * instructions (ignoring the boring and straightforward ones like shifts), + * + * 1. Arithmetic instructions (add{s}, sub{s}, cmp, cmn) take + * a 12-bit unsigned immediate with an optional 12-bit left shift. + * 2. Move instructions (movz, movn, movk) take a 16-bit immediate + * optionally shifted to any 16-bit-aligned position within the register. + * 3. Address calculations (adr, adrp) take a 21-bit signed immediate, + * although there's no actual syntax to specify it directly - to do + * so you'd have to resort to assembler expression trickery to generate + * an appropriate "label". + * 4. Logical instructions (and{s}, orr, eor, tst) take a "bitmask immediate", + * which I'm not sure I can even explain, so I'll just quote the + * mind-bogglingly complicated definition: + * "Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of + * identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. Each element + * contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, + * rotated by 0 to e-1 bits. This mechanism can generate 5,334 unique + * 64-bit patterns (as 2,667 pairs of pattern and their bitwise inverse)." + */ +class AArch64ImmOperand : public ImmOperand { + public: + AArch64ImmOperand(int64 val, uint32 size, bool isSigned, VaryType varyType = kNotVary, bool isFmov = false) + : ImmOperand(val, size, isSigned, varyType), isFmov(isFmov) {} + + ~AArch64ImmOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + bool IsInBitSize(uint8 size, uint8 nLowerZeroBits) const override { + /* mask1 is a 64bits number that is all 1 shifts left size bits */ + const uint64 mask1 = 0xffffffffffffffffUL << size; + /* mask2 is a 64 bits number that nlowerZeroBits are all 1, higher bits aro all 0 */ + uint64 mask2 = (static_cast(1) << static_cast(nLowerZeroBits)) - 1UL; + return (mask2 & value) == 0UL && (mask1 & ((static_cast(value)) >> nLowerZeroBits)) == 0UL; + } + + bool IsBitmaskImmediate() const { + ASSERT(!IsZero(), " 0 is reserved for bitmask immediate"); + ASSERT(!IsAllOnes(), " -1 is reserved for bitmask immediate"); + return maplebe::IsBitmaskImmediate(static_cast(value), static_cast(size)); + } + + bool IsBitmaskImmediate(uint32 destSize) { + ASSERT(!IsZero(), " 0 is reserved for bitmask immediate"); + ASSERT(!IsAllOnes(), " -1 is reserved for bitmask immediate"); + return maplebe::IsBitmaskImmediate(static_cast(value), static_cast(destSize)); + } + + bool IsSingleInstructionMovable() const override { + return (IsMoveWidableImmediate(static_cast(value), static_cast(size)) || + IsMoveWidableImmediate(~static_cast(value), static_cast(size)) || + IsBitmaskImmediate()); + } + + bool IsSingleInstructionMovable(uint32 destSize) { + return (IsMoveWidableImmediate(static_cast(value), static_cast(destSize)) || + IsMoveWidableImmediate(~static_cast(value), static_cast(destSize)) || + IsBitmaskImmediate(destSize)); + } + + void Emit(Emitter &emitter, const OpndProp *prop) const override; + + private: + bool isFmov; +}; + +class ImmFPZeroOperand : public Operand { + public: + explicit ImmFPZeroOperand(uint32 sz) : Operand(kOpdFPZeroImmediate, uint8(sz)) {} + + ~ImmFPZeroOperand() override = default; + + static ImmFPZeroOperand *allocate(uint8 sz) { + CHECK_FATAL((sz == k32BitSize || sz == k64BitSize), "half-precession is yet to be supported"); + auto *memPool = static_cast(CG::GetCurCGFuncNoConst()->GetMemoryPool()); + ImmFPZeroOperand *inst = memPool->New(static_cast(sz)); + return inst; + } + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override { + (void)opndProp; + emitter.Emit("#0.0"); + } + + bool Less(const Operand &right) const override { + /* For different type. */ + return GetKind() < right.GetKind(); + } + + void Dump() const override { + LogInfo::MapleLogger() << "imm fp" << size << ": 0.0"; + } +}; + +class AArch64OfstOperand : public OfstOperand { + public: + enum OfstType : uint8 { + kSymbolOffset, + kImmediateOffset, + kSymbolImmediateOffset, + }; + + /* only for symbol offset */ + AArch64OfstOperand(const MIRSymbol &mirSymbol, uint32 size, int32 relocs) + : OfstOperand(kOpdOffset, 0, size, true), + offsetType(kSymbolOffset), symbol(&mirSymbol), relocs(relocs) {} + /* only for Immediate offset */ + AArch64OfstOperand(int32 val, uint32 size, VaryType isVar = kNotVary) + : OfstOperand(kOpdOffset, static_cast(val), size, true, isVar), + offsetType(kImmediateOffset), symbol(nullptr), relocs(0) {} + /* for symbol and Immediate offset */ + AArch64OfstOperand(const MIRSymbol &mirSymbol, int64 val, uint32 size, int32 relocs, VaryType isVar = kNotVary) + : OfstOperand(kOpdOffset, val, size, true, isVar), + offsetType(kSymbolImmediateOffset), + symbol(&mirSymbol), + relocs(relocs) {} + + ~AArch64OfstOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + bool IsInBitSize(uint8 size, uint8 nLowerZeroBits) const override { + /* mask1 is a 64bits number that is all 1 shifts left size bits */ + const uint64 mask1 = 0xffffffffffffffffUL << size; + /* mask2 is a 64 bits number that nlowerZeroBits are all 1, higher bits aro all 0 */ + uint64 mask2 = (static_cast(1) << static_cast(nLowerZeroBits)) - 1UL; + return (mask2 & value) == 0UL && (mask1 & ((static_cast(value)) >> nLowerZeroBits)) == 0UL; + } + + bool IsSymOffset() const { + return offsetType == kSymbolOffset; + } + bool IsImmOffset() const { + return offsetType == kImmediateOffset; + } + bool IsSymAndImmOffset() const { + return offsetType == kSymbolImmediateOffset; + } + + const MIRSymbol *GetSymbol() const { + return symbol; + } + + const std::string &GetSymbolName() const { + return symbol->GetName(); + } + + int32 GetOffsetValue() const { + return GetValue(); + } + + void SetOffsetValue(int32 offVal) { + SetValue(static_cast(offVal)); + } + + void AdjustOffset(int32 delta) { + Add(static_cast(delta)); + } + + bool operator==(const AArch64OfstOperand &opnd) const { + return (offsetType == opnd.offsetType && symbol == opnd.symbol && + OfstOperand::operator==(opnd) && relocs == opnd.relocs); + } + + bool operator<(const AArch64OfstOperand &opnd) const { + return (offsetType < opnd.offsetType || + (offsetType == opnd.offsetType && symbol < opnd.symbol) || + (offsetType == opnd.offsetType && symbol == opnd.symbol && GetValue() < opnd.GetValue())); + } + + void Emit(Emitter &emitter, const OpndProp *prop) const override; + + void Dump() const override { + if (IsImmOffset()) { + LogInfo::MapleLogger() << "ofst:" << GetValue(); + } else { + LogInfo::MapleLogger() << GetSymbolName(); + LogInfo::MapleLogger() << "+offset:" << GetValue(); + } + } + + bool IsBitmaskImmediate() const { + ASSERT(!IsZero(), "0 is reserved for bitmask immediate"); + ASSERT(!IsAllOnes(), "-1 is reserved for bitmask immediate"); + return maplebe::IsBitmaskImmediate(static_cast(value), static_cast(size)); + } + + bool IsSingleInstructionMovable() const override { + return (IsMoveWidableImmediate(static_cast(value), static_cast(size)) || + IsMoveWidableImmediate(~static_cast(value), static_cast(size)) || + IsBitmaskImmediate()); + } + + private: + OfstType offsetType; + const MIRSymbol *symbol; + int32 relocs; +}; + +/* representing for global variables address */ +class StImmOperand : public Operand { + public: + StImmOperand(const MIRSymbol &symbol, int64 offset, int32 relocs) + : Operand(kOpdStImmediate, 0), symbol(&symbol), offset(offset), relocs(relocs) {} + + ~StImmOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + const MIRSymbol *GetSymbol() const { + return symbol; + } + + const std::string &GetName() const { + return symbol->GetName(); + } + + int64 GetOffset() const { + return offset; + } + + int32 GetRelocs() const { + return relocs; + } + + bool operator==(const StImmOperand &opnd) const { + return (symbol == opnd.symbol && offset == opnd.offset && relocs == opnd.relocs); + } + + bool operator<(const StImmOperand &opnd) const { + return (symbol < opnd.symbol || (symbol == opnd.symbol && offset < opnd.offset) || + (symbol == opnd.symbol && offset == opnd.offset && relocs < opnd.relocs)); + } + + bool Less(const Operand &right) const override; + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override; + + void Dump() const override { + LogInfo::MapleLogger() << GetName(); + LogInfo::MapleLogger() << "+offset:" << offset; + } + + private: + const MIRSymbol *symbol; + int64 offset; + int32 relocs; +}; + +class FunctionLabelOperand : public LabelOperand { + public: + explicit FunctionLabelOperand(const char *func) : LabelOperand(func, 0) {} + + ~FunctionLabelOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override { + (void)opndProp; + emitter.Emit(parentFunc); + } + + void Dump() const override { + LogInfo::MapleLogger() << "func :" << parentFunc; + } +}; + +/* Use StImmOperand instead? */ +class FuncNameOperand : public Operand { + public: + explicit FuncNameOperand(const MIRSymbol &fsym) : Operand(kOpdBBAddress, 0), symbol(&fsym) {} + + ~FuncNameOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + const std::string &GetName() const { + return symbol->GetName(); + } + + const MIRSymbol *GetFunctionSymbol() const { + return symbol; + } + + bool IsFuncNameOpnd() const override { + return true; + } + + void SetFunctionSymbol(const MIRSymbol &fsym) { + symbol = &fsym; + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override { + (void)opndProp; + emitter.Emit(GetName()); + } + + bool Less(const Operand &right) const override { + if (&right == this) { + return false; + } + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + auto *rightOpnd = static_cast(&right); + + return static_cast(symbol) < static_cast(rightOpnd->symbol); + } + + void Dump() const override { + LogInfo::MapleLogger() << GetName(); + } + + private: + const MIRSymbol *symbol; +}; + +class AArch64CGFunc; + +/* + * Table C1-6 A64 Load/Store addressing modes + * | Offset + * Addressing Mode | Immediate | Register | Extended Register + * + * Base register only | [base{,#0}] | - | - + * (no offset) | B_OI_NONE | | + * imm=0 + * + * Base plus offset | [base{,#imm}] | [base,Xm{,LSL #imm}] | [base,Wm,(S|U)XTW {#imm}] + * B_OI_NONE | B_OR_X | B_OR_X + * imm=0,1 (0,3) | imm=00,01,10,11 (0/2,s/u) + * + * Pre-indexed | [base, #imm]! | - | - + * + * Post-indexed | [base], #imm | [base], Xm(a) | - + * + * Literal | label | - | - + * (PC-relative) + * + * a) The post-indexed by register offset mode can be used with the SIMD Load/Store + * structure instructions described in Load/Store Vector on page C3-154. Otherwise + * the post-indexed by register offset mode is not avacilable. + */ +class AArch64MemOperand : public MemOperand { + public: + enum AArch64AddressingMode : uint8 { + kAddrModeUndef, + /* AddrMode_BO, base, offset. EA = [base] + offset; */ + kAddrModeBOi, /* INTACT: EA = [base]+immediate */ + /* + * PRE: base += immediate, EA = [base] + * POST: EA = [base], base += immediate + */ + kAddrModeBOrX, /* EA = [base]+Extend([offreg/idxreg]), OR=Wn/Xn */ + kAddrModeLiteral, /* AArch64 insruction LDR takes literal and */ + /* + * "calculates an address from the PC value and an immediate offset, + * loads a word from memory, and writes it to a register." + */ + kAddrModeLo12Li // EA = [base] + #:lo12:Label+immediate. (Example: [x0, #:lo12:__Label300+456] + }; + /* + * ARMv8-A A64 ISA Overview by Matteo Franchin @ ARM + * (presented at 64-bit Android on ARM. Sep. 2015) p.14 + * o Address to load from/store to is a 64-bit base register + an optional offset + * LDR X0, [X1] ; Load from address held in X1 + * STR X0, [X1] ; Store to address held in X1 + * + * o Offset can be an immediate or a register + * LDR X0, [X1, #8] ; Load from address [X1 + 8 bytes] + * LDR X0, [X1, #-8] ; Load with negative offset + * LDR X0, [X1, X2] ; Load from address [X1 + X2] + * + * o A Wn register offset needs to be extended to 64 bits + * LDR X0, [X1, W2, SXTW] ; Sign-extend offset in W2 + * LDR X0, [X1, W2, UXTW] ; Zero-extend offset in W2 + * + * o Both Xn and Wn register offsets can include an optional left-shift + * LDR X0, [X1, W2, UXTW #2] ; Zero-extend offset in W2 & left-shift by 2 + * LDR X0, [X1, X2, LSL #2] ; Left-shift offset in X2 by 2 + * + * p.15 + * Addressing Modes Analogous C Code + * int *intptr = ... // X1 + * int out; // W0 + * o Simple: X1 is not changed + * LDR W0, [X1] out = *intptr; + * o Offset: X1 is not changed + * LDR W0, [X1, #4] out = intptr[1]; + * o Pre-indexed: X1 changed before load + * LDR W0, [X1, #4]! =|ADD X1,X1,#4 out = *(++intptr); + * |LDR W0,[X1] + * o Post-indexed: X1 changed after load + * LDR W0, [X1], #4 =|LDR W0,[X1] out = *(intptr++); + * |ADD X1,X1,#4 + */ + enum ExtendInfo : uint8 { + kShiftZero = 0x1, + kShiftOne = 0x2, + kShiftTwo = 0x4, + kShiftThree = 0x8, + kUnsignedExtend = 0x10, + kSignExtend = 0x20 + }; + + enum IndexingOption : uint8 { + kIntact, /* base register stays the same */ + kPreIndex, /* base register gets changed before load */ + kPostIndex, /* base register gets changed after load */ + }; + + AArch64MemOperand(AArch64reg reg, int32 offset, uint32 size, IndexingOption idxOpt = kIntact) + : MemOperand(size, + CG::GetCurCGFuncNoConst()->GetMemoryPool()->New(reg, k64BitSize, kRegTyInt), + nullptr, + CG::GetCurCGFuncNoConst()->GetMemoryPool()->New(offset, k32BitSize), nullptr), + addrMode(kAddrModeBOi), + extend(0), + idxOpt(idxOpt), + noExtend(false), + isStackMem(false) { + if (reg == RSP || reg == RFP) { + isStackMem = true; + } + } + + AArch64MemOperand(AArch64AddressingMode mode, uint32 size, RegOperand &base, RegOperand *index, + OfstOperand *offset, const MIRSymbol *symbol) + : MemOperand(size, &base, index, offset, symbol), + addrMode(mode), + extend(0), + idxOpt(kIntact), + noExtend(false), + isStackMem(false) { + if (base.GetRegisterNumber() == RSP || base.GetRegisterNumber() == RFP) { + isStackMem = true; + } + } + + AArch64MemOperand(AArch64AddressingMode mode, uint32 size, RegOperand &base, RegOperand &index, + OfstOperand *offset, const MIRSymbol &symbol, bool noExtend) + : MemOperand(size, &base, &index, offset, &symbol), + addrMode(mode), + extend(0), + idxOpt(kIntact), + noExtend(noExtend), + isStackMem(false) { + if (base.GetRegisterNumber() == RSP || base.GetRegisterNumber() == RFP) { + isStackMem = true; + } + } + + AArch64MemOperand(AArch64AddressingMode mode, uint32 dSize, RegOperand &baseOpnd, RegOperand &indexOpnd, + uint32 shift, bool isSigned = false) + : MemOperand(dSize, &baseOpnd, &indexOpnd, nullptr, nullptr), + addrMode(mode), + extend((isSigned ? kSignExtend : kUnsignedExtend) | (1U << shift)), + idxOpt(kIntact), + noExtend(false), + isStackMem(false) { + ASSERT(dSize == (k8BitSize << shift), "incompatible data size and shift amount"); + if (baseOpnd.GetRegisterNumber() == RSP || baseOpnd.GetRegisterNumber() == RFP) { + isStackMem = true; + } + } + + AArch64MemOperand(AArch64AddressingMode mode, uint32 dSize, const MIRSymbol &sym) + : MemOperand(dSize, nullptr, nullptr, nullptr, &sym), + addrMode(mode), + extend(0), + idxOpt(kIntact), + noExtend(false), + isStackMem(false) { + ASSERT(mode == kAddrModeLiteral, "This constructor version is supposed to be used with AddrMode_Literal only"); + } + + ~AArch64MemOperand() override = default; + + /* + Copy constructor + */ + explicit AArch64MemOperand(const AArch64MemOperand &memOpnd) + : MemOperand(memOpnd), addrMode(memOpnd.addrMode), extend(memOpnd.extend), idxOpt(memOpnd.idxOpt), + noExtend(memOpnd.noExtend), isStackMem(memOpnd.isStackMem) {} + AArch64MemOperand &operator=(const AArch64MemOperand &memOpnd) = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + AArch64AddressingMode GetAddrMode() const { + return addrMode; + } + + const std::string &GetSymbolName() const { + return GetSymbol()->GetName(); + } + + void SetBaseRegister(AArch64RegOperand &baseRegOpnd) { + MemOperand::SetBaseRegister(baseRegOpnd); + } + + bool IsStackMem() const { + return isStackMem; + } + + void SetStackMem(bool isStack) { + isStackMem = isStack; + } + + RegOperand *GetOffsetRegister() const { + return MemOperand::GetIndexRegister(); + } + + Operand *GetOffset() const override; + + void SetOffsetRegister(AArch64RegOperand &osr) { + MemOperand::SetIndexRegister(osr); + } + + AArch64OfstOperand *GetOffsetImmediate() const { + return static_cast(GetOffsetOperand()); + } + + void SetOffsetImmediate(OfstOperand &ofstOpnd) { + MemOperand::SetOffsetOperand(ofstOpnd); + } + + /* Returns N where alignment == 2^N */ + static int32 GetImmediateOffsetAlignment(uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); + ASSERT(dSize <= k64BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + /* dSize==8: 0, dSize==16 : 1, dSize==32: 2, dSize==64: 3 */ + return __builtin_ctz(dSize) - kBaseOffsetAlignment; + } + + static int32 GetMaxPIMM(uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); + ASSERT(dSize <= k64BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + int32 alignment = GetImmediateOffsetAlignment(dSize); + /* alignment is between kAlignmentOf8Bit and kAlignmentOf64Bit */ + ASSERT(alignment >= kOffsetAlignmentOf8Bit, "error val:alignment"); + ASSERT(alignment <= kOffsetAlignmentOf64Bit, "error val:alignment"); + return (kMaxPimms[alignment]); + } + + bool IsOffsetMisaligned(uint32 dSize) const { + ASSERT(dSize >= k8BitSize, "error val:dSize"); + ASSERT(dSize <= k64BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + if (dSize == k8BitSize || addrMode != kAddrModeBOi) { + return false; + } + AArch64OfstOperand *ofstOpnd = GetOffsetImmediate(); + return ((static_cast(ofstOpnd->GetOffsetValue()) & + static_cast((1U << static_cast(GetImmediateOffsetAlignment(dSize))) - 1)) != 0); + } + + static bool IsSIMMOffsetOutOfRange(int32 offset, bool is64bit, bool isLDSTPair) { + if (!isLDSTPair) { + return (offset < kLdStSimmLowerBound || offset > kLdStSimmUpperBound); + } + if (is64bit) { + return (offset < kLdpStp64SimmLowerBound || offset > kLdpStp64SimmUpperBound); + } + return (offset < kLdpStp32SimmLowerBound || offset > kLdpStp32SimmUpperBound); + } + + static bool IsPIMMOffsetOutOfRange(int32 offset, uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); + ASSERT(dSize <= k64BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + return (offset < 0 || offset > GetMaxPIMM(dSize)); + } + + bool operator<(const AArch64MemOperand &opnd) const { + return addrMode < opnd.addrMode || + (addrMode == opnd.addrMode && GetBaseRegister() < opnd.GetBaseRegister()) || + (addrMode == opnd.addrMode && GetBaseRegister() == opnd.GetBaseRegister() && + GetIndexRegister() < opnd.GetIndexRegister()) || + (addrMode == opnd.addrMode && GetBaseRegister() == opnd.GetBaseRegister() && + GetIndexRegister() == opnd.GetIndexRegister() && GetOffsetOperand() < opnd.GetOffsetOperand()) || + (addrMode == opnd.addrMode && GetBaseRegister() == opnd.GetBaseRegister() && + GetIndexRegister() == opnd.GetIndexRegister() && GetOffsetOperand() == opnd.GetOffsetOperand() && + GetSymbol() < opnd.GetSymbol()) || + (addrMode == opnd.addrMode && GetBaseRegister() == opnd.GetBaseRegister() && + GetIndexRegister() == opnd.GetIndexRegister() && GetOffsetOperand() == opnd.GetOffsetOperand() && + GetSymbol() == opnd.GetSymbol() && GetSize() < opnd.GetSize()); + } + + bool Less(const Operand &right) const override; + + bool NoAlias(AArch64MemOperand &rightOpnd) const; + + VaryType GetMemVaryType() override { + Operand *ofstOpnd = GetOffsetOperand(); + if (ofstOpnd != nullptr) { + auto *opnd = static_cast(ofstOpnd); + return opnd->GetVary(); + } + return kNotVary; + } + + bool IsExtendedRegisterMode() const { + return addrMode == kAddrModeBOrX; + } + + bool SignedExtend() const { + return IsExtendedRegisterMode() && ((extend & kSignExtend) != 0); + } + + bool UnsignedExtend() const { + return IsExtendedRegisterMode() && !SignedExtend(); + } + + int32 ShiftAmount() const { + int32 scale = extend & 0xF; + ASSERT(IsExtendedRegisterMode(), "Just checking"); + /* 8 is 1 << 3, 4 is 1 << 2, 2 is 1 << 1, 1 is 1 << 0; */ + return (scale == 8) ? 3 : ((scale == 4) ? 2 : ((scale == 2) ? 1 : 0)); + } + + bool ShouldEmitExtend() const { + return !noExtend && ((extend & 0x3F) != 0); + } + + bool IsIntactIndexed() const { + return idxOpt == kIntact; + } + + bool IsPostIndexed() const { + return idxOpt == kPostIndex; + } + + bool IsPreIndexed() const { + return idxOpt == kPreIndex; + } + + std::string GetExtendAsString() const { + if (GetOffsetRegister()->GetSize() == k64BitSize) { + return std::string("LSL"); + } + return ((extend & kSignExtend) != 0) ? std::string("SXTW") : std::string("UXTW"); + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override; + + void Dump() const override; + + /* Return true if given operand has the same base reg and offset with this. */ + bool Equals(Operand &operand) const override; + bool Equals(AArch64MemOperand &opnd) const; + + private: + static constexpr int32 kLdStSimmLowerBound = -256; + static constexpr int32 kLdStSimmUpperBound = 255; + + static constexpr int32 kLdpStp32SimmLowerBound = -256; /* multiple of 4 */ + static constexpr int32 kLdpStp32SimmUpperBound = 252; + + static constexpr int32 kLdpStp64SimmLowerBound = -512; /* multiple of 8 */ + static constexpr int32 kLdpStp64SimmUpperBound = 504; + + static constexpr int32 kMaxPimm8 = 4095; + static constexpr int32 kMaxPimm16 = 8190; + static constexpr int32 kMaxPimm32 = 16380; + static constexpr int32 kMaxPimm64 = 32760; + + static const int32 kMaxPimms[4]; + + AArch64AddressingMode addrMode; + + uint32 extend; /* used with offset register ; AddrMode_B_OR_X */ + + IndexingOption idxOpt; /* used with offset immediate ; AddrMode_B_OI */ + + bool noExtend; + + bool isStackMem; +}; + +class AArch64ListOperand : public ListOperand { + public: + explicit AArch64ListOperand(MapleAllocator &allocator) : ListOperand(allocator) {} + + ~AArch64ListOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override; +}; + +class CondOperand : public Operand { + public: + explicit CondOperand(AArch64CC_t cc) : Operand(Operand::kOpdCond, k4ByteSize), cc(cc) {} + + ~CondOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.New(cc); + } + + AArch64CC_t GetCode() const { + return cc; + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override { + (void)opndProp; + emitter.Emit(ccStrs[cc]); + } + + bool Less(const Operand &right) const override; + + void Dump() const override { + LogInfo::MapleLogger() << "CC: " << ccStrs[cc]; + } + + private: + static const char *ccStrs[kCcLast]; + AArch64CC_t cc; +}; + +/* used with MOVK */ +class LogicalShiftLeftOperand : public Operand { + public: + /* + * Do not make the constructor public unless you are sure you know what you are doing. + * Only AArch64CGFunc is supposed to create LogicalShiftLeftOperand objects + * as part of initialization + */ + LogicalShiftLeftOperand(uint32 amt, int32 bitLen) + : Operand(Operand::kOpdShift, bitLen), shiftAmount(amt) {} /* bitlength is equal to 4 or 6 */ + + ~LogicalShiftLeftOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override { + (void)opndProp; + emitter.Emit(" LSL #").Emit(shiftAmount); + } + + bool Less(const Operand &right) const override { + if (&right == this) { + return false; + } + + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + auto *rightOpnd = static_cast(&right); + + /* The same type. */ + return shiftAmount < rightOpnd->shiftAmount; + } + + uint32 GetShiftAmount() const { + return shiftAmount; + } + + void Dump() const override { + LogInfo::MapleLogger() << "LSL: " << shiftAmount; + } + + private: + uint32 shiftAmount; +}; + +class ExtendShiftOperand : public Operand { + public: + enum ExtendOp : uint8 { + kSXTW, + }; + + ExtendShiftOperand(ExtendOp op, uint32 amt, int32 bitLen) + : Operand(Operand::kOpdExtend, bitLen), extendOp(op), shiftAmount(amt) {} + + ~ExtendShiftOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + void Emit(Emitter &emitter, const OpndProp *prop) const override { + (void)prop; + switch (extendOp) { + case kSXTW: + emitter.Emit("SXTW #").Emit(shiftAmount); + break; + default: + ASSERT(false, "should not be here"); + break; + } + } + + bool Less(const Operand &right) const override; + + void Dump() const override { + switch (extendOp) { + case kSXTW: + LogInfo::MapleLogger() << "SXTW: "; + break; + default: + ASSERT(false, "should not be here"); + break; + } + LogInfo::MapleLogger() << shiftAmount; + } + + private: + ExtendOp extendOp; + uint32 shiftAmount; +}; + +class BitShiftOperand : public Operand { + public: + enum ShiftOp : uint8 { + kLSL, /* logical shift left */ + kLSR, /* logical shift right */ + kASR, /* arithmetic shift right */ + }; + + BitShiftOperand(ShiftOp op, uint32 amt, int32 bitLen) + : Operand(Operand::kOpdShift, bitLen), shiftOp(op), shiftAmount(amt) {} /* bitlength is equal to 5 or 6 */ + + ~BitShiftOperand() override = default; + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + void Emit(Emitter &emitter, const OpndProp *prop) const override { + (void)prop; + emitter.Emit((shiftOp == kLSL) ? "LSL #" : ((shiftOp == kLSR) ? "LSR #" : "ASR #")).Emit(shiftAmount); + } + + bool Less(const Operand &right) const override; + + uint32 GetShiftAmount() { + return shiftAmount; + } + + void Dump() const override { + LogInfo::MapleLogger() << ((shiftOp == kLSL) ? "LSL: " : ((shiftOp == kLSR) ? "LSR: " : "ASR: ")); + LogInfo::MapleLogger() << shiftAmount; + } + + private: + ShiftOp shiftOp; + uint32 shiftAmount; +}; + +class CommentOperand : public Operand { + public: + CommentOperand(const char *str, MemPool &memPool) + : Operand(Operand::kOpdString, 0), comment(str, &memPool) {} + + CommentOperand(const std::string &str, MemPool &memPool) + : Operand(Operand::kOpdString, 0), comment(str, &memPool) {} + + ~CommentOperand() override = default; + + const MapleString &GetComment() const { + return comment; + } + + Operand *Clone(MemPool &memPool) const override { + return memPool.Clone(*this); + } + + bool IsCommentOpnd() const override { + return true; + } + + void Emit(Emitter &emitter, const OpndProp *opndProp) const override { + (void)opndProp; + emitter.Emit(comment); + } + + bool Less(const Operand &right) const override { + /* For different type. */ + return GetKind() < right.GetKind(); + } + + void Dump() const override { + LogInfo::MapleLogger() << "# " << comment << std::endl; + } + + private: + const MapleString comment; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPERAND_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_optimize_common.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_optimize_common.h new file mode 100644 index 0000000000000000000000000000000000000000..d3bf641745ca2f0f780f958d64018d1b4e784002 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_optimize_common.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPTIMIZE_COMMON_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPTIMIZE_COMMON_H + +#include "riscv64_isa.h" +#include "optimize_common.h" + +namespace maplebe { +using namespace maple; + + +class AArch64InsnVisitor : public InsnVisitor { + public: + explicit AArch64InsnVisitor(CGFunc &func) : InsnVisitor(func) {} + + ~AArch64InsnVisitor() override = default; + + void ModifyJumpTarget(maple::LabelIdx targetLabel, BB &bb) override; + void ModifyJumpTarget(Operand &targetOperand, BB &bb) override; + void ModifyJumpTarget(BB &newTarget, BB &bb) override; + /* Check if it requires to add extra gotos when relocate bb */ + MOperator FlipConditionOp(MOperator flippedOp, int &targetIdx) override; + Insn *CloneInsn(Insn &originalInsn) override; + LabelIdx GetJumpLabel(const Insn &insn) const override; + bool IsCompareInsn(const Insn &insn) const override; + bool IsCompareAndBranchInsn(const Insn &insn) const override; + RegOperand *CreateVregFromReg(const RegOperand &pReg) override; + + private: + int GetJumpTargetIdx(const Insn &insn) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_OPTIMIZE_COMMON_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_peep.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_peep.h new file mode 100644 index 0000000000000000000000000000000000000000..b652136e77f301ecc5b36ef9f5fbc583b31e31ee --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_peep.h @@ -0,0 +1,699 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PEEP_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PEEP_H + +#include +#include "peep.h" +#include "riscv64_cg.h" +#include "optimize_common.h" +#include "mir_builder.h" + +namespace maplebe { +/* + * Looking for identical mem insn to eliminate. + * If two back-to-back is: + * 1. str + str + * 2. str + ldr + * And the [MEM] is pattern of [base + offset] + * 1. The [MEM] operand is exactly same then first + * str can be eliminate. + * 2. The [MEM] operand is exactly same and src opnd + * of str is same as the dest opnd of ldr then + * ldr can be eliminate + */ +class RemoveIdenticalLoadAndStoreAArch64 : public PeepPattern { + public: + explicit RemoveIdenticalLoadAndStoreAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveIdenticalLoadAndStoreAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool IsMemOperandsIdentical(const Insn &insn1, const Insn &insn2) const; +}; + +/* Remove redundant mov which src and dest opnd is exactly same */ +class RemoveMovingtoSameRegAArch64 : public PeepPattern { + public: + explicit RemoveMovingtoSameRegAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveMovingtoSameRegAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* Combining 2 STRs into 1 stp or 2 LDRs into 1 ldp, when they are + * back to back and the [MEM] they access is conjointed. + */ +class CombineContiLoadAndStoreAArch64 : public PeepPattern { + public: + explicit CombineContiLoadAndStoreAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~CombineContiLoadAndStoreAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* Eliminate the sxt[b|h|w] w0, w0;, when w0 is satisify following: + * i) mov w0, #imm (#imm is not out of range) + * ii) ldrs[b|h] w0, [MEM] + */ +class EliminateSpecifcSXTAArch64 : public PeepPattern { + public: + explicit EliminateSpecifcSXTAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~EliminateSpecifcSXTAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* Eliminate the uxt[b|h|w] w0, w0;when w0 is satisify following: + * i) mov w0, #imm (#imm is not out of range) + * ii) mov w0, R0(Is return value of call and return size is not of range) + * iii)w0 is defined and used by special load insn and uxt[] pattern + */ +class EliminateSpecifcUXTAArch64 : public PeepPattern { + public: + explicit EliminateSpecifcUXTAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~EliminateSpecifcUXTAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* fmov ireg1 <- freg1 previous insn + * fmov ireg2 <- freg1 current insn + * use ireg2 may or may not be present + * => + * fmov ireg1 <- freg1 previous insn + * mov ireg2 <- ireg1 current insn + * use ireg1 may or may not be present + */ +class FmovRegAArch64 : public PeepPattern { + public: + explicit FmovRegAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~FmovRegAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* cbnz x0, labelA + * mov x0, 0 + * b return-bb + * labelA: + * => + * cbz x0, return-bb + * labelA: + */ +class CbnzToCbzAArch64 : public PeepPattern { + public: + explicit CbnzToCbzAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~CbnzToCbzAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* When exist load after load or load after store, and [MEM] is + * totally same. Then optimize them. + */ +class ContiLDRorSTRToSameMEMAArch64 : public PeepPattern { + public: + explicit ContiLDRorSTRToSameMEMAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ContiLDRorSTRToSameMEMAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Remove following patterns: + * mov x1, x0 + * bl MCC_IncDecRef_NaiveRCFast + */ +class RemoveIncDecRefAArch64 : public PeepPattern { + public: + explicit RemoveIncDecRefAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveIncDecRefAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * When GCONLY is enabled, the read barriers can be inlined. + * we optimize it with the following pattern: + * #if USE_32BIT_REF + * bl MCC_LoadRefField -> ldr w0, [x1] + * bl MCC_LoadVolatileField -> ldar w0, [x1] + * bl MCC_LoadRefStatic -> ldr w0, [x0] + * bl MCC_LoadVolatileStaticField -> ldar w0, [x0] + * bl MCC_Dummy -> omitted + * #else + * bl MCC_LoadRefField -> ldr x0, [x1] + * bl MCC_LoadVolatileField -> ldar x0, [x1] + * bl MCC_LoadRefStatic -> ldr x0, [x0] + * bl MCC_LoadVolatileStaticField -> ldar x0, [x0] + * bl MCC_Dummy -> omitted + * #endif + * + * if we encounter a tail call optimized read barrier call, + * such as: + * b MCC_LoadRefField + * a return instruction will be added just after the load: + * ldr w0, [x1] + * ret + */ +class InlineReadBarriersAArch64 : public PeepPattern { + public: + explicit InlineReadBarriersAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~InlineReadBarriersAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * mov w1, #34464 + * movk w1, #1, LSL #16 + * sdiv w2, w0, w1 + * ========> + * mov w1, #34464 // may deleted if w1 not live anymore. + * movk w1, #1, LSL #16 // may deleted if w1 not live anymore. + * mov w16, #0x588f + * movk w16, #0x4f8b, LSL #16 + * smull x16, w0, w16 + * asr x16, x16, #32 + * add x16, x16, w0, SXTW + * asr x16, x16, #17 + * add x2, x16, x0, LSR #31 + */ +class ReplaceDivToMultiAArch64 : public PeepPattern { + public: + explicit ReplaceDivToMultiAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceDivToMultiAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Optimize the following patterns: + * and w0, w0, #1 ====> and w0, w0, #1 + * cmp w0, #1 + * cset w0, EQ + * + * and w0, w0, #1 ====> and w0, w0, #1 + * cmp w0, #0 + * cset w0, NE + * --------------------------------------------------- + * and w0, w0, #imm ====> ubfx w0, w0, pos, size + * cmp w0, #imm + * cset w0, EQ + * + * and w0, w0, #imm ====> ubfx w0, w0, pos, size + * cmp w0, #0 + * cset w0, NE + * conditions: + * imm is pos power of 2 + */ +class AndCmpBranchesToCsetAArch64 : public PeepPattern { + public: + explicit AndCmpBranchesToCsetAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~AndCmpBranchesToCsetAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * We optimize the following pattern in this function: + * cmp w[0-9]*, wzr ====> tbz w[0-9]*, #31, .label + * bge .label + * + * cmp wzr, w[0-9]* ====> tbz w[0-9]*, #31, .label + * ble .label + * + * cmp w[0-9]*,wzr ====> tbnz w[0-9]*, #31, .label + * blt .label + * + * cmp wzr, w[0-9]* ====> tbnz w[0-9]*, #31, .label + * bgt .label + * + * cmp w[0-9]*, #0 ====> tbz w[0-9]*, #31, .label + * bge .label + * + * cmp w[0-9]*, #0 ====> tbnz w[0-9]*, #31, .label + * blt .label + */ +class ZeroCmpBranchesAArch64 : public PeepPattern { + public: + explicit ZeroCmpBranchesAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ZeroCmpBranchesAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * cmp w0, #0 + * cset w1, NE --> mov w1, w0 + * + * cmp w0, #0 + * cset w1, EQ --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, NE --> eor w1, w0, 1 + * + * cmp w0, #1 + * cset w1, EQ --> mov w1, w0 + * + * cmp w0, #0 + * cset w0, NE -->null + * + * cmp w0, #1 + * cset w0, EQ -->null + * + * condition: + * 1. the first operand of cmp instruction must has only one valid bit + * 2. the second operand of cmp instruction must be 0 or 1 + * 3. flag register of cmp isntruction must not be used later + */ +class CmpCsetAArch64 : public PeepPattern { + public: + explicit CmpCsetAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~CmpCsetAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool CheckOpndDefPoints(Insn &checkInsn, int opndIdx); + const Insn *DefInsnOfOperandInBB(const Insn &startInsn, const Insn &checkInsn, int opndIdx); + bool OpndDefByOneValidBit(const Insn &defInsn); + bool FlagUsedLaterInCurBB(const BB &bb, Insn &startInsn) const; +}; + +/* + * add x0, x1, x0 + * ldr x2, [x0] + * ==> + * ldr x2, [x1, x0] + */ +class ComplexMemOperandAddAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandAddAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandAddAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + private: + + bool IsExpandBaseOpnd(const Insn &insn, Insn &prevInsn); +}; + +/* + * cbnz w0, @label + * .... + * mov w0, #0 (elseBB) -->this instruction can be deleted + * + * cbz w0, @label + * .... + * mov w0, #0 (ifBB) -->this instruction can be deleted + * + * condition: + * 1.there is not predefine points of w0 in elseBB(ifBB) + * 2.the first opearnd of cbnz insn is same as the first Operand of mov insn + * 3.w0 is defined by move 0 + * 4.all preds of elseBB(ifBB) end with cbnz or cbz + * + * NOTE: if there are multiple preds and there is not define point of w0 in one pred, + * (mov w0, 0) can't be deleted, avoiding use before def. + */ +class DeleteMovAfterCbzOrCbnzAArch64 : public PeepPattern { + public: + explicit DeleteMovAfterCbzOrCbnzAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) { + cgcfg = cgFunc.GetTheCFG(); + cgcfg->InitInsnVisitor(cgFunc); + } + ~DeleteMovAfterCbzOrCbnzAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool PredBBCheck(BB &bb, bool checkCbz, const Operand &opnd) const; + bool OpndDefByMovZero(const Insn &insn) const; + bool NoPreDefine(Insn &testInsn) const; + void ProcessBBHandle(BB *processBB, const BB &bb, const Insn &insn); + CGCFG *cgcfg; +}; + +/* + * We optimize the following pattern in this function: + * if w0's valid bits is one + * uxtb w0, w0 + * eor w0, w0, #1 + * cbz w0, .label + * => + * tbnz w0, .label + * && + * if there exists uxtb w0, w0 and w0's valid bits is + * less than 8, eliminate it. + */ +class OneHoleBranchesPreAArch64 : public PeepPattern { + public: + explicit OneHoleBranchesPreAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~OneHoleBranchesPreAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + private: + MOperator FindNewMop(const BB &bb, const Insn &insn) const; +}; + +/* + * We optimize the following pattern in this function: + * movz x0, #11544, LSL #0 + * movk x0, #21572, LSL #16 + * movk x0, #8699, LSL #32 + * movk x0, #16393, LSL #48 + * => + * ldr x0, label_of_constant_1 + */ +class LoadFloatPointAArch64 : public PeepPattern { + public: + explicit LoadFloatPointAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~LoadFloatPointAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + private: + bool FindLoadFloatPoint(std::vector &optInsn, Insn &insn); + bool IsPatternMatch(const std::vector &optInsn); +}; + +/* + * Optimize the following patterns: + * orr w21, w0, #0 ====> mov w21, w0 + * orr w21, #0, w0 ====> mov w21, w0 + */ +class ReplaceOrrToMovAArch64 : public PeepPattern { + public: + explicit ReplaceOrrToMovAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceOrrToMovAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Optimize the following patterns: + * ldr w0, [x21,#68] ldr w0, [x21,#68] + * mov w1, #-1 mov w1, #-1 + * cmp w0, w1 ====> cmn w0, #-1 + */ +class ReplaceCmpToCmnAArch64 : public PeepPattern { + public: + explicit ReplaceCmpToCmnAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceCmpToCmnAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Remove following patterns: + * mov x0, XX + * mov x1, XX + * bl MCC_IncDecRef_NaiveRCFast + */ +class RemoveIncRefAArch64 : public PeepPattern { + public: + explicit RemoveIncRefAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveIncRefAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * opt long int compare with 0 + * *cmp x0, #0 + * csinv w0, wzr, wzr, GE + * csinc w0, w0, wzr, LE + * cmp w0, #0 + * => + * cmp x0, #0 + */ +class LongIntCompareWithZAArch64 : public PeepPattern { + public: + explicit LongIntCompareWithZAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~LongIntCompareWithZAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool FindLondIntCmpWithZ(std::vector &optInsn, Insn &insn); + bool IsPatternMatch(const std::vector &optInsn); +}; + +/* + * add x0, x1, #:lo12:Ljava_2Futil_2FLocale_241_3B_7C_24SwitchMap_24java_24util_24Locale_24Category + * ldr x2, [x0] + * ==> + * ldr x2, [x1, #:lo12:Ljava_2Futil_2FLocale_241_3B_7C_24SwitchMap_24java_24util_24Locale_24Category] + */ +class ComplexMemOperandAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * add x0, x1, x0 + * ldr x2, [x0] + * ==> + * ldr x2, [x1, x0] + */ +class ComplexMemOperandPreAddAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandPreAddAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandPreAddAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * add x0, x0, x1, LSL #2 + * ldr x2, [x0] + * ==> + * ldr x2, [x0,x1,LSL #2] + */ +class ComplexMemOperandLSLAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandLSLAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandLSLAArch64() override = default; + bool CheckShiftValid(const AArch64MemOperand &memOpnd, BitShiftOperand &lsl) const; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * ldr x0, label_of_constant_1 + * fmov d4, x0 + * ==> + * ldr d4, label_of_constant_1 + */ +class ComplexMemOperandLabelAArch64 : public PeepPattern { + public: + explicit ComplexMemOperandLabelAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComplexMemOperandLabelAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * mov R0, vreg1 / R0 mov R0, vreg1 + * add vreg2, vreg1, #imm1 add vreg2, vreg1, #imm1 + * mov R1, vreg2 mov R1, vreg2 + * mov R2, vreg3 mov R2, vreg3 + * ... ... + * mov R0, vreg1 + * add vreg4, vreg1, #imm2 -> str vreg5, [vreg1, #imm2] + * mov R1, vreg4 + * mov R2, vreg5 + */ +class WriteFieldCallAArch64 : public PeepPattern { + public: + struct WriteRefFieldParam { + Operand *objOpnd = nullptr; + RegOperand *fieldBaseOpnd = nullptr; + int64 fieldOffset = 0; + Operand *fieldValue = nullptr; + }; + explicit WriteFieldCallAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~WriteFieldCallAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + void Reset() { + hasWriteFieldCall = false; + prevCallInsn = nullptr; + } + + private: + bool hasWriteFieldCall = false; + Insn *prevCallInsn = nullptr; + WriteRefFieldParam firstCallParam; + bool WriteFieldCallOptPatternMatch(const Insn &writeFieldCallInsn, WriteRefFieldParam ¶m, + std::vector ¶mDefInsns); + bool IsWriteRefFieldCallInsn(const Insn &insn); +}; + +/* + * Remove following patterns: + * mov x0, xzr/#0 + * bl MCC_DecRef_NaiveRCFast + */ +class RemoveDecRefAArch64 : public PeepPattern { + public: + explicit RemoveDecRefAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~RemoveDecRefAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * We optimize the following pattern in this function: + * add x1, x1, #16 + * add w2, w10, w10 + * add w2, w2, #1 + * sxtw x2, w2 + * add x1, x1, x2, LSL #3 + * => + * add x1, x1, w10, SXTW #(3+1) combine origin insn 2 (self-added operation) + * add x1, x1, #24 + */ +class ComputationTreeAArch64 : public PeepPattern { + public: + explicit ComputationTreeAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ComputationTreeAArch64() override = default; + void Run(BB &bb, Insn &insn) override; + + private: + bool IsPatternMatch(const std::vector &optInsn) const; + bool FindComputationTree(std::vector &optInsn, Insn &insn); +}; + +/* + * We optimize the following pattern in this function: + * and x1, x1, #imm (is n power of 2) + * cbz/cbnz x1, .label + * => + * and x1, x1, #imm (is n power of 2) + * tbnz/tbz x1, #n, .label + */ +class OneHoleBranchesAArch64 : public PeepPattern { + public: + explicit OneHoleBranchesAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~OneHoleBranchesAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Replace following pattern: + * mov x1, xzr + * bl MCC_IncDecRef_NaiveRCFast + * => + * bl MCC_IncRef_NaiveRCFast + */ +class ReplaceIncDecWithIncAArch64 : public PeepPattern { + public: + explicit ReplaceIncDecWithIncAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~ReplaceIncDecWithIncAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +/* + * Optimize the following patterns: + * and w0, w6, #1 ====> tbz w6, 0, .label + * cmp w0, #1 + * bne .label + * + * and w0, w6, #16 ====> tbz w6, 4, .label + * cmp w0, #16 + * bne .label + * + * and w0, w6, #32 ====> tbnz w6, 5, .label + * cmp w0, #32 + * beq .label + * + * and x0, x6, #32 ====> tbz x6, 5, .label + * cmp x0, #0 + * beq .label + * + * and x0, x6, #32 ====> tbnz x6, 5, .label + * cmp x0, #0 + * bne .label + */ +class AndCmpBranchesToTbzAArch64 : public PeepPattern { + public: + explicit AndCmpBranchesToTbzAArch64(CGFunc &cgFunc) : PeepPattern(cgFunc) {} + ~AndCmpBranchesToTbzAArch64() override = default; + void Run(BB &bb, Insn &insn) override; +}; + +class AArch64PeepHole : public PeepPatternMatch { + public: + AArch64PeepHole(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PeepHole() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kRemoveIdenticalLoadAndStoreOpt = 0, + kRemoveMovingtoSameRegOpt, + kCombineContiLoadAndStoreOpt, + kEliminateSpecifcSXTOpt, + kEliminateSpecifcUXTOpt, + kFmovRegOpt, + kCbnzToCbzOpt, + kContiLDRorSTRToSameMEMOpt, + kRemoveIncDecRefOpt, + kInlineReadBarriersOpt, + kReplaceDivToMultiOpt, + kAndCmpBranchesToCsetOpt, + kZeroCmpBranchesOpt, + kPeepholeOptsNum + }; +}; + +class AArch64PeepHole0 : public PeepPatternMatch { + public: + AArch64PeepHole0(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PeepHole0() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kRemoveIdenticalLoadAndStoreOpt = 0, + kCmpCsetOpt, + kComplexMemOperandOptAdd, + kDeleteMovAfterCbzOrCbnzOpt, + kPeepholeOptsNum + }; +}; + +class AArch64PrePeepHole : public PeepPatternMatch { + public: + AArch64PrePeepHole(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PrePeepHole() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kOneHoleBranchesPreOpt = 0, + kLoadFloatPointOpt, + kReplaceOrrToMovOpt, + kReplaceCmpToCmnOpt, + kRemoveIncRefOpt, + kLongIntCompareWithZOpt, + kComplexMemOperandOpt, + kComplexMemOperandPreOptAdd, + kComplexMemOperandOptLSL, + kComplexMemOperandOptLabel, + kWriteFieldCallOpt, + kPeepholeOptsNum + }; +}; + +class AArch64PrePeepHole1 : public PeepPatternMatch { + public: + AArch64PrePeepHole1(CGFunc &oneCGFunc, MemPool *memPool) : PeepPatternMatch(oneCGFunc, memPool) {} + ~AArch64PrePeepHole1() override = default; + void InitOpts() override; + void Run(BB &bb, Insn &insn) override; + + private: + enum PeepholeOpts : int32 { + kRemoveDecRefOpt = 0, + kComputationTreeOpt, + kOneHoleBranchesOpt, + kReplaceIncDecWithIncOpt, + kAndCmpBranchesToTbzOpt, + kPeepholeOptsNum + }; +}; +} /* namespace maplebe */ +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PEEP_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_proepilog.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_proepilog.h new file mode 100644 index 0000000000000000000000000000000000000000..c0115cffcbe29d4737600139acbb0afb1e20adf5 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_proepilog.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PROEPILOG_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PROEPILOG_H + +#include "proepilog.h" +#include "cg.h" +#include "operand.h" +#include "riscv64_cgfunc.h" +#include "riscv64_operand.h" +#include "riscv64_insn.h" + +namespace maplebe { +using namespace maple; + +class AArch64GenProEpilog : public GenProEpilog { + public: + explicit AArch64GenProEpilog(CGFunc &func) : GenProEpilog(func) {} + ~AArch64GenProEpilog() override = default; + + bool TailCallOpt() override; + void Run() override; + private: + void GenStackGuard(BB&); + BB &GenStackGuardCheckInsn(BB&); + bool HasLoop(); + bool OptimizeTailBB(BB &bb, std::set &callInsns); + void TailCallBBOpt(const BB &exitBB, std::set &callInsns); + void ForwardPropagateAndRename(Insn &mv, Insn &ld, const BB &terminateBB); + void ReplaceMachedOperand(Insn &orig, Insn &target, const RegOperand &match, bool replaceOrigSrc); + bool BackwardFindDependency(BB &ifbb, RegOperand &tgtOpnd, Insn *&ld, Insn *&mov, + Insn *&depMov, std::list &list); + BB *IsolateFastPath(BB&); + AArch64MemOperand *SplitStpLdpOffsetForCalleeSavedWithAddInstruction(const AArch64MemOperand &mo, uint32 bitLen, + AArch64reg baseReg = AArch64reg::kRinvalid); + void AppendInstructionPushPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); + void AppendInstructionPushSingle(AArch64reg reg, RegType rty, int offset); + void AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty); + void AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty); + void GeneratePushRegs(); + void GeneratePushUnnamedVarargRegs(); + void AppendInstructionStackCheck(AArch64reg reg, RegType rty, int offset); + void GenerateProlog(BB&); + + void GenerateRet(BB &bb); + bool TestPredsOfRetBB(const BB &exitBB); + void AppendInstructionPopSingle(AArch64reg reg, RegType rty, int offset); + void AppendInstructionPopPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); + void AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty); + void AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty); + void GeneratePopRegs(); + void AppendJump(const MIRSymbol &func); + void GenerateEpilog(BB&); + void GenerateEpilogForCleanup(BB&); + Insn &CreateAndAppendInstructionForAllocateCallFrame(int64 argsToStkPassSize, AArch64reg reg0, AArch64reg reg1, + RegType rty); + Insn &AppendInstructionForAllocateOrDeallocateCallFrame(int64 argsToStkPassSize, AArch64reg reg0, AArch64reg reg1, + RegType rty, bool isAllocate); + static constexpr const int32 kOffset8MemPos = 8; + static constexpr const int32 kOffset16MemPos = 16; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_PROEPILOG_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_reaching.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_reaching.h new file mode 100644 index 0000000000000000000000000000000000000000..9aef3742368d54d653ad7c3d085713497bf4a2af --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_reaching.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REACHING_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REACHING_H + +#include "reaching.h" + +namespace maplebe { +class AArch64ReachingDefinition : public ReachingDefinition { + public: + AArch64ReachingDefinition(CGFunc &func, MemPool &memPool) : ReachingDefinition(func, memPool) {} + ~AArch64ReachingDefinition() override = default; + std::vector FindRegDefBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn) const final; + std::vector FindMemDefBetweenInsn(uint32 offset, const Insn *startInsn, Insn *endInsn) const final; + bool FindRegUseBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn, InsnSet &useInsnSet) const final; + bool FindMemUseBetweenInsn(uint32 offset, Insn *startInsn, const Insn *endInsn, + InsnSet &useInsnSet) const final; + InsnSet FindDefForRegOpnd(Insn &insn, uint32 indexOrRegNO, bool isRegNO = false) const final; + InsnSet FindDefForMemOpnd(Insn &insn, uint32 indexOrOffset, bool isOffset = false) const final; + InsnSet FindUseForMemOpnd(Insn &insn, uint8 index, bool secondMem = false) const final; + + protected: + void InitStartGen() final; + void InitEhDefine(BB &bb) final; + void InitGenUse(BB &bb, bool firstTime = true) final; + void GenAllCallerSavedRegs(BB &bb) final; + void AddRetPseudoInsn(BB &bb) final; + void AddRetPseudoInsns() final; + bool IsCallerSavedReg(uint32 regNO) const final; + void FindRegDefInBB(uint32 regNO, BB &bb, InsnSet &defInsnSet) const final; + void FindMemDefInBB(uint32 offset, BB &bb, InsnSet &defInsnSet) const final; + void DFSFindDefForRegOpnd(const BB &startBB, uint32 regNO, std::vector &visitedBB, + InsnSet &defInsnSet) const final; + void DFSFindDefForMemOpnd(const BB &startBB, uint32 offset, std::vector &visitedBB, + InsnSet &defInsnSet) const final; + int32 GetStackSize() const final; + + private: + void InitInfoForMemOperand(Insn &insn, Operand &opnd, bool isDef); + inline void InitInfoForListOpnd(const BB &bb, Operand &opnd); + inline void InitInfoForConditionCode(const BB &bb); + inline void InitInfoForRegOpnd(const BB &bb, Operand &opnd, bool isDef); + void InitMemInfoForClearStackCall(Insn &callInsn); + inline bool CallInsnClearDesignateStackRef(const Insn &callInsn, int64 offset) const; + int64 GetEachMemSizeOfPair(MOperator opCode) const; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REACHING_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_reg_alloc.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_reg_alloc.h new file mode 100644 index 0000000000000000000000000000000000000000..f997b3a49dd1ae38d6f53c701de014a820887974 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_reg_alloc.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REG_ALLOC_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REG_ALLOC_H +#include "reg_alloc.h" +#include "riscv64_operand.h" +#include "riscv64_insn.h" +#include "riscv64_abi.h" + +namespace maplebe { +class AArch64RegAllocator : public RegAllocator { + public: + AArch64RegAllocator(CGFunc &cgFunc, MemPool &memPool) + : RegAllocator(cgFunc), + alloc(&memPool), + regMap(std::less(), alloc.Adapter()), + liveReg(std::less(), alloc.Adapter()), + allocatedSet(std::less(), alloc.Adapter()), + regLiveness(std::less(), alloc.Adapter()), + visitedBBs(alloc.Adapter()), + sortedBBs(alloc.Adapter()), + rememberRegs(alloc.Adapter()) { + for (int32 i = 0; i != kAllRegNum; i++) { + availRegSet[i] = false; + } + } + + ~AArch64RegAllocator() override = default; + + void InitAvailReg(); + bool AllocatePhysicalRegister(RegOperand &opnd); + void PreAllocate(); + void ReleaseReg(AArch64reg reg); + void ReleaseReg(RegOperand ®Opnd); + void GetPhysicalRegisterBank(RegType regType, uint8 &start, uint8 &end); + void AllocHandleCallee(Insn &insn, const AArch64MD &md); + bool IsYieldPointReg(AArch64reg regNO) const; + bool IsSpecialReg(AArch64reg reg) const; + bool IsUntouchableReg(uint32 regNO) const; + void SaveCalleeSavedReg(RegOperand &opnd); + + bool AllPredBBVisited(BB &bb) const; + BB *MarkStraightLineBBInBFS(BB*); + BB *SearchForStraightLineBBs(BB&); + void BFS(BB &bb); + void ComputeBlockOrder(); + + std::string PhaseName() const { + return "regalloc"; + } + + protected: + Operand *HandleRegOpnd(Operand &opnd); + Operand *HandleMemOpnd(Operand &opnd); + Operand *AllocSrcOpnd(Operand &opnd, OpndProp *opndProp = nullptr); + Operand *AllocDestOpnd(Operand &opnd, const Insn &insn); + + uint32 GetRegLivenessId(Operand *opnd); + void SetupRegLiveness(BB *bb); + + MapleAllocator alloc; + bool availRegSet[kAllRegNum]; + MapleMap regMap; /* virtual-register-to-physical-register map */ + MapleSet liveReg; /* a set of currently live physical registers */ + MapleSet allocatedSet; /* already allocated */ + MapleMap regLiveness; + MapleVector visitedBBs; + MapleVector sortedBBs; + MapleVector rememberRegs; +}; + +class DefaultO0RegAllocator : public AArch64RegAllocator { + public: + DefaultO0RegAllocator(CGFunc &cgFunc, MemPool &memPool) : AArch64RegAllocator(cgFunc, memPool) {} + + ~DefaultO0RegAllocator() override = default; + + bool AllocateRegisters() override; +}; +; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_REG_ALLOC_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_schedule.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_schedule.h new file mode 100644 index 0000000000000000000000000000000000000000..f6f9606485fc8f532a42a28191c266a119e407a0 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_schedule.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_SCHEDULE_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_SCHEDULE_H + +#include "schedule.h" +#include "riscv64_operand.h" + +namespace maplebe { +enum RegisterType : uint8 { + kRegisterUndef, + kRegisterInt, + kRegisterFloat, + kRegisterCc, + kRegisterLast, +}; + +class ScheduleProcessInfo { + public: + explicit ScheduleProcessInfo(uint32 size) { + availableReadyList.reserve(size); + scheduledNodes.reserve(size); + } + + ~ScheduleProcessInfo() = default; + + uint32 GetLastUpdateCycle() const { + return lastUpdateCycle; + } + + void SetLastUpdateCycle(uint32 updateCycle) { + lastUpdateCycle = updateCycle; + } + + uint32 GetCurrCycle() const { + return currCycle; + } + + void IncCurrCycle() { + ++currCycle; + } + + void DecAdvanceCycle() { + advanceCycle--; + } + + uint32 GetAdvanceCycle() const { + return advanceCycle; + } + + void SetAdvanceCycle(uint32 cycle) { + advanceCycle = cycle; + } + + void ClearAvailableReadyList() { + availableReadyList.clear(); + } + + void PushElemIntoAvailableReadyList(DepNode *node) { + availableReadyList.emplace_back(node); + } + + size_t SizeOfAvailableReadyList() const { + return availableReadyList.size(); + } + + bool AvailableReadyListIsEmpty() const { + return availableReadyList.empty(); + } + + void SetAvailableReadyList(const std::vector &tempReadyList) { + availableReadyList = tempReadyList; + } + + const std::vector &GetAvailableReadyList() const { + return availableReadyList; + } + + const std::vector &GetAvailableReadyList() { + return availableReadyList; + } + + void PushElemIntoScheduledNodes(DepNode *node) { + node->SetState(kScheduled); + node->SetSchedCycle(currCycle); + node->OccupyUnits(); + scheduledNodes.emplace_back(node); + } + + bool IsFirstSeparator() const { + return isFirstSeparator; + } + + void ResetIsFirstSeparator() { + isFirstSeparator = false; + } + + size_t SizeOfScheduledNodes() const { + return scheduledNodes.size(); + } + + const std::vector &GetScheduledNodes() const { + return scheduledNodes; + } + + private: + std::vector availableReadyList; + std::vector scheduledNodes; + uint32 lastUpdateCycle = 0; + uint32 currCycle = 0; + uint32 advanceCycle = 0; + bool isFirstSeparator = true; +}; + +class AArch64Schedule : public Schedule { + public: + AArch64Schedule(CGFunc &func, MemPool &memPool, LiveAnalysis &live, const std::string &phaseName) + : Schedule(func, memPool, live, phaseName) {} + ~AArch64Schedule() override = default; + protected: + void DumpDepGraph(const MapleVector &nodes) const; + void DumpScheduleResult(const MapleVector &nodes, SimulateType type) const; + void GenerateDot(const BB &bb, const MapleVector &nodes) const; + + private: + void Init() override; + void MemoryAccessPairOpt() override; + void ClinitPairOpt() override; + void RegPressureScheduling(BB &bb, MapleVector &nd) override; + uint32 DoSchedule() override; + uint32 DoBruteForceSchedule() override; + uint32 SimulateOnly() override; + void UpdateBruteForceSchedCycle() override; + void IterateBruteForce(DepNode &targetNode, MapleVector &readyList, uint32 currCycle, + MapleVector &scheduledNodes, uint32 &maxCycleCount, + MapleVector &optimizedScheduledNodes) override; + void FindAndCombineMemoryAccessPair(const std::vector &readyList) override; + bool CanCombine(const Insn &insn) const override; + void ListScheduling(bool beforeRA) override; + void BruteForceScheduling(const BB &bb); + void SimulateScheduling(const BB &bb); + void FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis) override; + uint32 ComputeEstart(uint32 cycle) override; + void ComputeLstart(uint32 maxEstart) override; + void UpdateELStartsOnCycle(uint32 cycle) override; + void RandomTest() override; + void EraseNodeFromReadyList(const DepNode &target) override; + void EraseNodeFromNodeList(const DepNode &target, MapleVector &readyList) override; + uint32 GetNextSepIndex() const override; + void CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const override; + static bool IfUseUnitKind(const DepNode &depNode, uint32 index); + void UpdateReadyList(DepNode &targetNode, MapleVector &readyList, bool updateEStart) override; + void UpdateScheduleProcessInfo(ScheduleProcessInfo &info); + bool CheckSchedulable(ScheduleProcessInfo &info) const; + void SelectNode(ScheduleProcessInfo &scheduleInfo); + static void DumpDebugInfo(const ScheduleProcessInfo &info); + static bool CompareDepNode(const DepNode &node1, const DepNode &node2); + void CalculateMaxUnitKindCount(ScheduleProcessInfo &scheduleInfo); + static uint32 maxUnitIndex; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_SCHEDULE_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_strldr.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_strldr.h new file mode 100644 index 0000000000000000000000000000000000000000..ab371e540c6629cd58f247c02800cd9cddd191a4 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_strldr.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_STRLDR_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_STRLDR_H + +#include "strldr.h" +#include "riscv64_reaching.h" + +namespace maplebe { +using namespace maple; + +class AArch64StoreLoadOpt : public StoreLoadOpt { + public: + AArch64StoreLoadOpt(CGFunc &func, MemPool &memPool) + : StoreLoadOpt(func, memPool), localAlloc(&memPool), str2MovMap(localAlloc.Adapter()) {} + ~AArch64StoreLoadOpt() override = default; + void Run() final; + void DoStoreLoadOpt(); + void DoLoadZeroToMoveTransfer(const Insn&, short, const InsnSet&) const; + void DoLoadToMoveTransfer(Insn&, short, short, const InsnSet&); + bool CheckStoreOpCode(MOperator opCode) const; + private: + void ProcessStrPair(Insn &insn); + void ProcessStr(Insn &insn); + void GenerateMoveLiveInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq); + void GenerateMoveDeadInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq); + MapleAllocator localAlloc; + /* the max number of mov insn to optimize. */ + static constexpr uint8 kMaxMovNum = 2; + MapleMap str2MovMap; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_STRLDR_H */ diff --git a/src/mapleall/maple_be/include/cg/riscv64/riscv64_yieldpoint.h b/src/mapleall/maple_be/include/cg/riscv64/riscv64_yieldpoint.h new file mode 100644 index 0000000000000000000000000000000000000000..d459a87094738feef02c58b3134e5edf90245ac8 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/riscv64/riscv64_yieldpoint.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_YIELDPOINT_H +#define MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_YIELDPOINT_H + +#include "yieldpoint.h" + +namespace maplebe { +using namespace maple; + +class AArch64YieldPointInsertion : public YieldPointInsertion { + public: + explicit AArch64YieldPointInsertion(CGFunc &func) : YieldPointInsertion(func) {} + + ~AArch64YieldPointInsertion() override = default; + + void Run() override; + + private: + void InsertYieldPoint(); +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_YIELDPOINT_H */ \ No newline at end of file diff --git a/src/mapleall/maple_be/src/ad/mad.cpp b/src/mapleall/maple_be/src/ad/mad.cpp index 238f6316525ca059ffaea8ede19adbcc785a4e6b..4ebb0076bb9bd0ede1bcc588614ffa0434f0ded9 100644 --- a/src/mapleall/maple_be/src/ad/mad.cpp +++ b/src/mapleall/maple_be/src/ad/mad.cpp @@ -14,7 +14,11 @@ */ #include "mad.h" #include +#if TARGAARCH64 #include "aarch64_operand.h" +#elif TARGRISCV64 +#include "riscv64_operand.h" +#endif #include "schedule.h" #include "insn.h" diff --git a/src/mapleall/maple_be/src/cg/args.cpp b/src/mapleall/maple_be/src/cg/args.cpp index e52886155d0eeebf7ee4dc8767e1f72a7d0bfd05..e1fef0fbb9a8aa8d8460a195b0b9804ce16e6c3c 100644 --- a/src/mapleall/maple_be/src/cg/args.cpp +++ b/src/mapleall/maple_be/src/cg/args.cpp @@ -15,6 +15,8 @@ #include "args.h" #if TARGAARCH64 #include "aarch64_args.h" +#elif TARGRISCV64 +#include "riscv64_args.h" #endif #if TARGARM32 #include "arm32_args.h" diff --git a/src/mapleall/maple_be/src/cg/cfgo.cpp b/src/mapleall/maple_be/src/cg/cfgo.cpp index 4dad1f4b99f5ff316010ad879ab716ce3864308b..b30958f480d6082f5a22459296ab5c057ea36c92 100644 --- a/src/mapleall/maple_be/src/cg/cfgo.cpp +++ b/src/mapleall/maple_be/src/cg/cfgo.cpp @@ -15,7 +15,11 @@ #include "cfgo.h" #include "cgbb.h" #include "cg.h" +#if TARGAARCH64 #include "aarch64_insn.h" +#elif TARGRISCV64 +#include "riscv64_insn.h" +#endif #include "mpl_logging.h" /* diff --git a/src/mapleall/maple_be/src/cg/cg_cfg.cpp b/src/mapleall/maple_be/src/cg/cg_cfg.cpp index d795028c5c5bf01d687d2cf84fbe79426abf8058..6191e38ddc84ac7657222facf444ff12e6a169d4 100644 --- a/src/mapleall/maple_be/src/cg/cg_cfg.cpp +++ b/src/mapleall/maple_be/src/cg/cg_cfg.cpp @@ -15,6 +15,8 @@ #include "cg_cfg.h" #if TARGAARCH64 #include "aarch64_insn.h" +#elif TARGRISCV64 +#include "riscv64_insn.h" #endif #if TARGARM32 #include "arm32_insn.h" diff --git a/src/mapleall/maple_be/src/cg/cg_phasemanager.cpp b/src/mapleall/maple_be/src/cg/cg_phasemanager.cpp index 00250713909f2a3bc85002d32edb8bc9b2085683..5b50618873ef5667b22669cb356f782a150cae34 100644 --- a/src/mapleall/maple_be/src/cg/cg_phasemanager.cpp +++ b/src/mapleall/maple_be/src/cg/cg_phasemanager.cpp @@ -24,7 +24,11 @@ #include "global.h" #include "strldr.h" #include "peep.h" +#if TARGAARCH64 #include "aarch64_fixshortbranch.h" +#elif TARGRISCV64 +#include "riscv64_fixshortbranch.h" +#endif #include "live.h" #include "loop.h" #include "mpl_timer.h" diff --git a/src/mapleall/maple_be/src/cg/ebo.cpp b/src/mapleall/maple_be/src/cg/ebo.cpp index bb93e6d7d98e9ee0a1baca4cff3be3beb36533f7..cd3bbc6a4f1412fa5e201175bfb1d826d42370e8 100644 --- a/src/mapleall/maple_be/src/cg/ebo.cpp +++ b/src/mapleall/maple_be/src/cg/ebo.cpp @@ -14,6 +14,8 @@ */ #if TARGAARCH64 #include "aarch64_ebo.h" +#elif TARGRISCV64 +#include "riscv64_ebo.h" #endif #if TARGARM32 #include "arm32_ebo.h" diff --git a/src/mapleall/maple_be/src/cg/global.cpp b/src/mapleall/maple_be/src/cg/global.cpp index 1493b075096c4cd1bfba671af418e3a507016c67..a2f09a71cfef6c8b225c4e566fd5d4a5e4f0c1b8 100644 --- a/src/mapleall/maple_be/src/cg/global.cpp +++ b/src/mapleall/maple_be/src/cg/global.cpp @@ -14,6 +14,8 @@ */ #if TARGAARCH64 #include "aarch64_global.h" +#elif TARGRISCV64 +#include "riscv64_global.h" #endif #if TARGARM32 #include "arm32_global.h" diff --git a/src/mapleall/maple_be/src/cg/ico.cpp b/src/mapleall/maple_be/src/cg/ico.cpp index 9b92a45b6ab5a714814a2f4a919d7b44da91c9b3..adaa4cd37db78a7cb2f264604de1bc4851268136 100644 --- a/src/mapleall/maple_be/src/cg/ico.cpp +++ b/src/mapleall/maple_be/src/cg/ico.cpp @@ -18,6 +18,10 @@ #include "aarch64_ico.h" #include "aarch64_isa.h" #include "aarch64_insn.h" +#elif TARGRISCV64 +#include "riscv64_ico.h" +#include "riscv64_isa.h" +#include "riscv64_insn.h" #elif TARGARM32 #include "arm32_ico.h" #include "arm32_isa.h" diff --git a/src/mapleall/maple_be/src/cg/live.cpp b/src/mapleall/maple_be/src/cg/live.cpp index f1da998956ef2e69f1229fac8bea0c9af9e7806a..364c59e195a3cbca37e3a9b747a9be6fc5161f93 100644 --- a/src/mapleall/maple_be/src/cg/live.cpp +++ b/src/mapleall/maple_be/src/cg/live.cpp @@ -16,6 +16,8 @@ #include #if TARGAARCH64 #include "aarch64_live.h" +#elif TARGRISCV64 +#include "riscv64_live.h" #endif #if TARGARM32 #include "arm32_live.h" diff --git a/src/mapleall/maple_be/src/cg/offset_adjust.cpp b/src/mapleall/maple_be/src/cg/offset_adjust.cpp index e0763773c47fadcea5271690d48d57fd68c11943..3d6e6d284c720e64f85519fdef880c1fe4056fbd 100644 --- a/src/mapleall/maple_be/src/cg/offset_adjust.cpp +++ b/src/mapleall/maple_be/src/cg/offset_adjust.cpp @@ -15,6 +15,8 @@ #include "offset_adjust.h" #if TARGAARCH64 #include "aarch64_offset_adjust.h" +#elif TARGRISCV64 +#include "riscv64_offset_adjust.h" #endif #if TARGARM32 #include "arm32_offset_adjust.h" diff --git a/src/mapleall/maple_be/src/cg/peep.cpp b/src/mapleall/maple_be/src/cg/peep.cpp index 991900287ba9fffd4dd82dad86b5748761877a82..4f3d8fde6d8e2d5d39ab51a194ba04495ee027de 100644 --- a/src/mapleall/maple_be/src/cg/peep.cpp +++ b/src/mapleall/maple_be/src/cg/peep.cpp @@ -18,6 +18,8 @@ #include "common_utils.h" #if TARGAARCH64 #include "aarch64_peep.h" +#elif TARGRISCV64 +#include "riscv64_peep.h" #endif #if TARGARM32 #include "arm32_peep.h" diff --git a/src/mapleall/maple_be/src/cg/pressure.cpp b/src/mapleall/maple_be/src/cg/pressure.cpp index 8fe05f3ba1311abba2e81b910725b9eaf2f69bdb..65369ba5707882185658aad3d6f7271b80210807 100644 --- a/src/mapleall/maple_be/src/cg/pressure.cpp +++ b/src/mapleall/maple_be/src/cg/pressure.cpp @@ -13,7 +13,11 @@ * See the Mulan PSL v2 for more details. */ #include "pressure.h" +#if TARGAARCH64 #include "aarch64_schedule.h" +#elif TARGRISCV64 +#include "riscv64_schedule.h" +#endif #include "deps.h" namespace maplebe { diff --git a/src/mapleall/maple_be/src/cg/proepilog.cpp b/src/mapleall/maple_be/src/cg/proepilog.cpp index b6184685631ce09542e82ea926f2bf8df33212d3..18d86af56c26129b36a87d5670bb303e95c50337 100644 --- a/src/mapleall/maple_be/src/cg/proepilog.cpp +++ b/src/mapleall/maple_be/src/cg/proepilog.cpp @@ -15,6 +15,8 @@ #include "proepilog.h" #if TARGAARCH64 #include "aarch64_proepilog.h" +#elif TARGRISCV64 +#include "riscv64_proepilog.h" #endif #if TARGARM32 #include "arm32_proepilog.h" diff --git a/src/mapleall/maple_be/src/cg/reaching.cpp b/src/mapleall/maple_be/src/cg/reaching.cpp index 7babdea784b09de3965aa2dc6dd9f825d43a20bb..92d6352213d9872e9e2b3cd2eadd0d5388b9b61f 100644 --- a/src/mapleall/maple_be/src/cg/reaching.cpp +++ b/src/mapleall/maple_be/src/cg/reaching.cpp @@ -14,6 +14,8 @@ */ #if TARGAARCH64 #include "aarch64_reaching.h" +#elif TARGRISCV64 +#include "riscv64_reaching.h" #endif #if TARGARM32 #include "arm32_reaching.h" diff --git a/src/mapleall/maple_be/src/cg/riscv64/mpl_atomic.cpp b/src/mapleall/maple_be/src/cg/riscv64/mpl_atomic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e0fe5afe60a384b5b0f618fe6a477b6f5e802c85 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/mpl_atomic.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "mpl_atomic.h" +#include +#include "mpl_logging.h" + +namespace maple { +namespace { +constexpr int32 kMaxSizeOfTab = 6; +}; +MemOrd MemOrdFromU32(uint32 val) { + /* 6 is the size of tab below. 2 is memory_order_consume, it is Disabled. */ + CHECK_FATAL(val <= kMaxSizeOfTab, "Illegal number for MemOrd: %u", val); + CHECK_FATAL(val != 2, "Illegal number for MemOrd: %u", val); + static std::array tab = { + MemOrd::kNotAtomic, + MemOrd::memory_order_relaxed, + /* + * memory_order_consume Disabled. Its semantics is debatable. + * We don't support it now, but reserve the number. Use memory_order_acquire instead. + */ + MemOrd::memory_order_acquire, /* padding entry */ + MemOrd::memory_order_acquire, + MemOrd::memory_order_release, + MemOrd::memory_order_acq_rel, + MemOrd::memory_order_seq_cst, + }; + return tab[val]; +} + +bool MemOrdIsAcquire(MemOrd ord) { + static std::array tab = { + false, /* kNotAtomic */ + false, /* memory_order_relaxed */ + true, /* memory_order_consume */ + true, /* memory_order_acquire */ + false, /* memory_order_release */ + true, /* memory_order_acq_rel */ + true, /* memory_order_seq_cst */ + }; + uint32 tabIndex = static_cast(ord); + CHECK_FATAL(tabIndex <= kMaxSizeOfTab, "Illegal number for MemOrd: %u", tabIndex); + return tab[tabIndex]; +} + +bool MemOrdIsRelease(MemOrd ord) { + static std::array tab = { + false, /* kNotAtomic */ + false, /* memory_order_relaxed */ + false, /* memory_order_consume */ + false, /* memory_order_acquire */ + true, /* memory_order_release */ + true, /* memory_order_acq_rel */ + true, /* memory_order_seq_cst */ + }; + uint32 tabIndex = static_cast(ord); + CHECK_FATAL(tabIndex <= kMaxSizeOfTab, "Illegal number for MemOrd: %u", tabIndex); + return tab[tabIndex]; +} +} /* namespace maple */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_abi.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_abi.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0862b2617429fa051d353dc3f6a0e5752f4be59e --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_abi.cpp @@ -0,0 +1,758 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_cgfunc.h" +#include "becommon.h" + +namespace maplebe { +using namespace maple; + +namespace { +constexpr int kMaxRegCount = 2; + +/* + * return the class resulted from merging the two classes, based on rules + * described by the ARM ABI + */ +AArch64ArgumentClass MergeClasses(AArch64ArgumentClass class0, AArch64ArgumentClass class1) { + /* + * maybe return ( class0 | class 1 ) would do if + * ( class0 != kAArch64MemoryClass && class1 != kAArch64MemoryClass ) always hold + */ + if (class0 == class1) { + return class0; + } + if (class0 == kAArch64NoClass) { + return class1; + } + if (class1 == kAArch64NoClass) { + return class0; + } + if ((class0 == kAArch64MemoryClass) || (class1 == kAArch64MemoryClass)) { + return kAArch64MemoryClass; + } + if ((class0 == kAArch64IntegerClass) || (class1 == kAArch64IntegerClass)) { + return kAArch64IntegerClass; + } + ASSERT(false, "NYI"); + return kAArch64NoClass; +} + +int32 ProcessNonStructAndNonArrayWhenClassifyAggregate(const MIRType &mirType, + AArch64ArgumentClass classes[kMaxRegCount], + size_t classesLength) { + CHECK_FATAL(classesLength > 0, "classLength must > 0"); + /* scalar type */ + switch (mirType.GetPrimType()) { + case PTY_u1: + case PTY_u8: + case PTY_i8: + case PTY_u16: + case PTY_i16: + case PTY_a32: + case PTY_u32: + case PTY_i32: + case PTY_a64: + case PTY_ptr: + case PTY_ref: + case PTY_u64: + case PTY_i64: + classes[0] = kAArch64IntegerClass; + return 1; + case PTY_f32: + case PTY_f64: + case PTY_c64: + case PTY_c128: + classes[0] = kAArch64FloatClass; + return 1; + default: + CHECK_FATAL(false, "NYI"); + } + + /* should not reach to this point */ + return 0; +} + +void ProcessNonUnionWhenClassifyAggregate(const BECommon &be, const MIRType &fieldType, uint32 &fldBofst, + uint64 &allocedSize, uint64 &allocedSizeInBits) { + /* determine fld_bofst for this field */ + uint64 fieldTypeSize = be.GetTypeSize(fieldType.GetTypeIndex()); + ASSERT(fieldTypeSize != 0, "fieldTypeSize should not be 0"); + uint8 fieldAlign = be.GetTypeAlign(fieldType.GetTypeIndex()); + ASSERT(fieldAlign != 0, "fieldAlign should not be 0"); + if (fieldType.GetKind() == kTypeBitField) { + uint32 fieldSize = static_cast(fieldType).GetFieldSize(); + if ((allocedSizeInBits / (fieldAlign * k8ByteSize)) != + ((allocedSizeInBits + fieldSize - 1u) / (fieldAlign * k8ByteSize))) { + /* + * the field is crossing the align boundary of its base type; + * align alloced_size_in_bits to fieldAlign + */ + allocedSizeInBits = RoundUp(allocedSizeInBits, fieldAlign * k8ByteSize); + } + /* allocate the bitfield */ + fldBofst = allocedSizeInBits; + allocedSizeInBits += fieldSize; + allocedSize = std::max(allocedSize, RoundUp(allocedSizeInBits, fieldAlign * k8ByteSize) / k8ByteSize); + } else { + /* pad alloced_size according to the field alignment */ + allocedSize = RoundUp(allocedSize, fieldAlign); + fldBofst = allocedSize * k8ByteSize; + allocedSize += fieldTypeSize; + allocedSizeInBits = allocedSize * k8ByteSize; + } +} + +int32 ClassifyAggregate(BECommon &be, MIRType &mirType, AArch64ArgumentClass classes[kMaxRegCount], + size_t classesLength); + +void ProcessStructWhenClassifyAggregate(BECommon &be, MIRStructType &structType, int32 &subNumRegs, + AArch64ArgumentClass classes[kMaxRegCount], + size_t classesLength) { + CHECK_FATAL(classesLength > 0, "classLength must > 0"); + int32 sizeOfTyInDwords = RoundUp(be.GetTypeSize(structType.GetTypeIndex()), k8ByteSize) >> k8BitShift; + AArch64ArgumentClass subClasses[kMaxRegCount]; + uint32 fldBofst = 0; /* offset of field in bits within immediate struct */ + uint64 allocedSize = 0; + uint64 allocedSizeInBits = 0; + for (uint32 f = 0; f < structType.GetFieldsSize(); ++f) { + TyIdx fieldTyIdx = structType.GetFieldsElemt(f).second.first; + MIRType *fieldType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(fieldTyIdx); + subNumRegs = ClassifyAggregate(be, *fieldType, subClasses, sizeof(subClasses) / sizeof(AArch64ArgumentClass)); + ASSERT(subNumRegs > 0, "expect subNumRegs > 0"); /* we come here when the total size < 16? */ + if (subNumRegs == 0) { + return; + } + if (structType.GetKind() != kTypeUnion) { + ProcessNonUnionWhenClassifyAggregate(be, (*fieldType), fldBofst, allocedSize, allocedSizeInBits); + } else { + /* + * for unions, bitfields are treated as non-bitfields + * the parent aggregate is union, why are we increasing the alloced_size? + * this would alter the next field's bit offset? + */ + uint64 fieldTypeSize = be.GetTypeSize(fieldType->GetTypeIndex()); + ASSERT(fieldTypeSize != 0, "fieldTypeSize should not be 0"); + fldBofst = allocedSize * k8ByteSize; + allocedSize = std::max(allocedSize, fieldTypeSize); + } + /* merge subClasses into classes */ + int32 idx = fldBofst >> 6; /* index into the struct in doublewords */ + ASSERT(idx > 0, "expect idx > 0"); + ASSERT(idx < kMaxRegCount, "expect idx < kMaxRegCount"); + ASSERT(subNumRegs == 1, "subNumRegs should be equal to 1"); + ASSERT(subClasses[0] != kAArch64MemoryClass, "expect a kAArch64MemoryClass"); + for (int32 i = 0; i < subNumRegs; ++i) { + classes[i + idx] = MergeClasses(classes[i + idx], subClasses[i]); + } + } + if (subNumRegs < sizeOfTyInDwords) { + for (int32 i = 1; i < sizeOfTyInDwords; ++i) { + if (classes[i] == kAArch64NoClass) { + classes[i] = classes[i - 1]; + } + } + } +} + +void ProcessArrayWhenClassifyAggregate(BECommon &be, const MIRArrayType &mirArrayType, int32 &subNumRegs, + AArch64ArgumentClass classes[kMaxRegCount], size_t classesLength) { + CHECK_FATAL(classesLength > 0, "classLength must > 0"); + int32 sizeOfTyInDwords = RoundUp(be.GetTypeSize(mirArrayType.GetTypeIndex()), k8ByteSize) >> k8BitShift; + AArch64ArgumentClass subClasses[kMaxRegCount]; + subNumRegs = ClassifyAggregate(be, *(GlobalTables::GetTypeTable().GetTypeFromTyIdx(mirArrayType.GetElemTyIdx())), + subClasses, sizeof(subClasses) / sizeof(AArch64ArgumentClass)); + CHECK_FATAL(subNumRegs == 1, "subnumregs should be equal to 1"); + for (int32 i = 0; i < sizeOfTyInDwords; ++i) { + classes[i] = subClasses[i]; + } +} + +/* + * Analyze the given aggregate using the rules given by the ARM 64-bit ABI and + * return the number of doublewords to be passed in registers; the classes of + * the doublewords are returned in parameter "classes"; if 0 is returned, it + * means the whole aggregate is passed in memory. + */ +int32 ClassifyAggregate(BECommon &be, MIRType &mirType, AArch64ArgumentClass classes[kMaxRegCount], + size_t classesLength) { + CHECK_FATAL(classesLength > 0, "invalid index"); + uint64 sizeOfTy = be.GetTypeSize(mirType.GetTypeIndex()); + /* Rule B.3. + * If the argument type is a Composite Type that is larger than 16 bytes + * then the argument is copied to memory allocated by the caller and + * the argument is replaced by a pointer to the copy. + */ + if ((sizeOfTy > k16ByteSize) || (sizeOfTy == 0)) { + return 0; + } + + /* + * An argument of any Integer class takes up an integer register + * which is a single double-word. + * Rule B.4. The size of an argument of composite type is rounded up to the nearest + * multiple of 8 bytes. + */ + int32 sizeOfTyInDwords = RoundUp(sizeOfTy, k8ByteSize) >> k8BitShift; + ASSERT(sizeOfTyInDwords > 0, "sizeOfTyInDwords should be sizeOfTyInDwords > 0"); + ASSERT(sizeOfTyInDwords <= kMaxRegCount, "sizeOfTyInDwords should be sizeOfTyInDwords <= kMaxRegCount"); + int32 i; + for (i = 0; i < sizeOfTyInDwords; ++i) { + classes[i] = kAArch64NoClass; + } + if ((mirType.GetKind() != kTypeStruct) && (mirType.GetKind() != kTypeArray) && (mirType.GetKind() != kTypeUnion)) { + return ProcessNonStructAndNonArrayWhenClassifyAggregate(mirType, classes, classesLength); + } + int32 subNumRegs; + if (mirType.GetKind() == kTypeStruct) { + MIRStructType &structType = static_cast(mirType); + ProcessStructWhenClassifyAggregate(be, structType, subNumRegs, classes, classesLength); + if (subNumRegs == 0) { + return 0; + } + } else { + /* mirType->_kind == TYPE_ARRAY */ + auto &mirArrayType = static_cast(mirType); + ProcessArrayWhenClassifyAggregate(be, mirArrayType, subNumRegs, classes, classesLength); + } + /* post merger clean-up */ + for (i = 0; i < sizeOfTyInDwords; ++i) { + if (classes[i] == kAArch64MemoryClass) { + return 0; + } + } + return sizeOfTyInDwords; +} +} + +namespace AArch64Abi { +bool IsAvailableReg(AArch64reg reg) { + switch (reg) { +/* integer registers */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case R##ID: \ + return canBeAssigned; +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +/* fp-simd registers */ +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case V##ID: \ + return canBeAssigned; +#define FP_SIMD_REG_ALIAS(ID) +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + default: + return false; + } +} + +bool IsCalleeSavedReg(AArch64reg reg) { + switch (reg) { +/* integer registers */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case R##ID: \ + return isCalleeSave; +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +/* fp-simd registers */ +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case V##ID: \ + return isCalleeSave; +#define FP_SIMD_REG_ALIAS(ID) +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + default: + return false; + } +} + +bool IsParamReg(AArch64reg reg) { + switch (reg) { +/* integer registers */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case R##ID: \ + return isParam; +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +/* fp-simd registers */ +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case V##ID: \ + return isParam; +#define FP_SIMD_REG_ALIAS(ID) +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + default: + return false; + } +} + +bool IsSpillReg(AArch64reg reg) { + switch (reg) { +/* integer registers */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case R##ID: \ + return isSpill; +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +/* fp-simd registers */ +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case V##ID: \ + return isSpill; +#define FP_SIMD_REG_ALIAS(ID) +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + default: + return false; + } +} + +bool IsExtraSpillReg(AArch64reg reg) { + switch (reg) { +/* integer registers */ +#define INT_REG(ID, PREF32, PREF64, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case R##ID: \ + return isExtraSpill; +#define INT_REG_ALIAS(ALIAS, ID, PREF32, PREF64) +#include "riscv64_int_regs.def" +#undef INT_REG +#undef INT_REG_ALIAS +/* fp-simd registers */ +#define FP_SIMD_REG(ID, PV, P8, P16, P32, P64, P128, canBeAssigned, isCalleeSave, isParam, isSpill, isExtraSpill) \ + case V##ID: \ + return isExtraSpill; +#define FP_SIMD_REG_ALIAS(ID) +#include "riscv64_fp_simd_regs.def" +#undef FP_SIMD_REG +#undef FP_SIMD_REG_ALIAS + default: + return false; + } +} + +bool IsSpillRegInRA(AArch64reg regNO, bool has3RegOpnd) { + /* if has 3 RegOpnd, previous reg used to spill. */ + if (has3RegOpnd) { + return AArch64Abi::IsSpillReg(regNO) || AArch64Abi::IsExtraSpillReg(regNO); + } + return AArch64Abi::IsSpillReg(regNO); +} +} /* namespace AArch64Abi */ + +void ParmLocator::InitPLocInfo(PLocInfo &pLoc) const { + pLoc.reg0 = kRinvalid; + pLoc.reg1 = kRinvalid; + pLoc.memOffset = nextStackArgAdress; +} + +/* + * Refer to ARM IHI 0055C_beta: Procedure Call Standard for + * the ARM 64-bit Architecture. $5.4.2 + * + * For internal only functions, we may want to implement + * our own rules as Apple IOS has done. Maybe we want to + * generate two versions for each of externally visible functions, + * one conforming to the ARM standard ABI, and the other for + * internal only use. + * + * LocateNextParm should be called with each parameter in the parameter list + * starting from the beginning, one call per parameter in sequence; it returns + * the information on how each parameter is passed in pLoc + */ +int32 ParmLocator::LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst) { + InitPLocInfo(pLoc); + + if (isFirst) { + MIRFunction *func = const_cast(beCommon.GetMIRModule().CurFunction()); + if (beCommon.HasFuncReturnType(*func)) { + uint32 size = beCommon.GetTypeSize(beCommon.GetFuncReturnType(*func)); + if (size == 0) { + /* For return struct size 0 there is no return value. */ + return 0; + } else if (size > k16ByteSize) { + /* For return struct size > 16 bytes the pointer returns in x8. */ + pLoc.reg0 = R8; + return kSizeOfPtr; + } + /* For return struct size less or equal to 16 bytes, the values + * are returned in register pairs. Do nothing here. + */ + } + } + uint64 typeSize = beCommon.GetTypeSize(mirType.GetTypeIndex()); + if (typeSize == 0) { + return 0; + } + int32 typeAlign = beCommon.GetTypeAlign(mirType.GetTypeIndex()); + /* + * Rule C.12 states that we do round nextStackArgAdress up before we use its value + * according to the alignment requirement of the argument being processed. + * We do the rounding up at the end of LocateNextParm(), + * so we want to make sure our rounding up is correct. + */ + ASSERT((nextStackArgAdress & (std::max(typeAlign, static_cast(k8ByteSize)) - 1)) == 0, + "C.12 alignment requirement is violated"); + pLoc.memSize = static_cast(typeSize); + ++paramNum; + + int32 aggCopySize = 0; + switch (mirType.GetPrimType()) { + case PTY_u1: + case PTY_u8: + case PTY_i8: + case PTY_u16: + case PTY_i16: + case PTY_a32: + case PTY_u32: + case PTY_i32: + case PTY_ptr: + case PTY_ref: + case PTY_a64: + case PTY_u64: + case PTY_i64: + /* Rule C.7 */ + typeSize = k8ByteSize; + pLoc.reg0 = AllocateGPRegister(); + ASSERT(nextGeneralRegNO <= AArch64Abi::kNumIntParmRegs, "RegNo should be pramRegNO"); + break; + /* + * for c64 complex numbers, we assume + * - callers marshall the two f32 numbers into one f64 register + * - callees de-marshall one f64 value into the real and the imaginery part + */ + case PTY_f32: + case PTY_f64: + case PTY_c64: + /* Rule C.1 */ + ASSERT(GetPrimTypeSize(PTY_f64) == k8ByteSize, "unexpected type size"); + typeSize = k8ByteSize; + pLoc.reg0 = AllocateSIMDFPRegister(); + break; + /* + * for c128 complex numbers, we assume + * - callers marshall the two f64 numbers into one f128 register + * - callees de-marshall one f128 value into the real and the imaginery part + */ + case PTY_c128: + /* SIMD-FP registers have 128-bits. */ + pLoc.reg0 = AllocateSIMDFPRegister(); + ASSERT(nextFloatRegNO <= AArch64Abi::kNumFloatParmRegs, "regNO should not be greater than kNumFloatParmRegs"); + ASSERT(typeSize == k16ByteSize, "unexpected type size"); + break; + /* + * case of quad-word integer: + * we don't support java yet. + * if (has-16-byte-alignment-requirement) + * nextGeneralRegNO = (nextGeneralRegNO+1) & ~1; // C.8 round it up to the next even number + * try allocate two consecutive registers at once. + */ + /* case PTY_agg */ + case PTY_agg: { + aggCopySize = ProcessPtyAggWhenLocateNextParm(mirType, pLoc, typeSize, typeAlign); + break; + } + default: + CHECK_FATAL(false, "NYI"); + } + + /* Rule C.12 */ + if (pLoc.reg0 == kRinvalid) { + /* being passed in memory */ + nextStackArgAdress = pLoc.memOffset + typeSize; + } + return aggCopySize; +} + +int32 ParmLocator::ProcessPtyAggWhenLocateNextParm(MIRType &mirType, PLocInfo &pLoc, uint64 &typeSize, + int32 typeAlign) { + /* + * In AArch64, integer-float or float-integer + * argument passing is not allowed. All should go through + * integer-integer. + */ + AArch64ArgumentClass classes[kMaxRegCount] = { kAArch64NoClass }; +#if DEBUG + int32 saveIntParmNum = nextGeneralRegNO; + int32 saveFloatParmNum = nextFloatRegNO; +#endif + typeSize = beCommon.GetTypeSize(mirType.GetTypeIndex().GetIdx()); + int32 aggCopySize = 0; + if (typeSize > k16ByteSize) { + aggCopySize = RoundUp(typeSize, kSizeOfPtr); + } + /* + * alignment requirement + * Note. This is one of a few things iOS diverges from + * the ARM 64-bit standard. They don't observe the round-up requirement. + */ + if (typeAlign == k16ByteSize) { + RoundNGRNUpToNextEven(); + } + + int32 numRegs = ClassifyAggregate(beCommon, mirType, classes, sizeof(classes) / sizeof(AArch64ArgumentClass)); + if (numRegs == 1) { + /* passing in registers */ + typeSize = k8ByteSize; + if (classes[0] == kAArch64FloatClass) { + pLoc.reg0 = AllocateSIMDFPRegister(); + ASSERT(nextFloatRegNO == saveFloatParmNum, "RegNo should be saved pramRegNO"); + } else { + pLoc.reg0 = AllocateGPRegister(); + ASSERT(nextGeneralRegNO == saveIntParmNum, "RegNo should be saved pramRegNO"); + /* Rule C.11 */ + ASSERT((pLoc.reg0 != kRinvalid) || (nextGeneralRegNO == AArch64Abi::kNumIntParmRegs), + "reg0 should not be kRinvalid or nextGeneralRegNO should equal kNumIntParmRegs"); + } + } else if (numRegs == kMaxRegCount) { + ASSERT(classes[0] == kAArch64IntegerClass, "class 0 must be integer class"); + ASSERT(classes[1] == kAArch64IntegerClass, "class 1 must be integer class"); + AllocateTwoGPRegisters(pLoc); + /* Rule C.11 */ + if (pLoc.reg0 == kRinvalid) { + nextGeneralRegNO = AArch64Abi::kNumIntParmRegs; + } + } else { + /* + * 0 returned from ClassifyAggregate(). This means the whole data + * is passed thru memory. + * Rule B.3. + * If the argument type is a Composite Type that is larger than 16 + * bytes then the argument is copied to memory allocated by the + * caller and the argument is replaced by a pointer to the copy. + * + * Try to allocate an integer register + */ + typeSize = k8ByteSize; + pLoc.reg0 = AllocateGPRegister(); + pLoc.memSize = k8ByteSize; /* byte size of a pointer in AArch64 */ + if (pLoc.reg0 != kRinvalid) { + numRegs = 1; + } + } + /* compute rightpad */ + if ((numRegs == 0) || (pLoc.reg0 == kRinvalid)) { + /* passed in memory */ + typeSize = RoundUp(pLoc.memSize, k8ByteSize); + } + return aggCopySize; +} + +/* + * instantiated with the type of the function return value, it describes how + * the return value is to be passed back to the caller + * + * Refer to ARM IHI 0055C_beta: Procedure Call Standard for + * the ARM 64-bit Architecture. $5.5 + * "If the type, T, of the result of a function is such that + * void func(T arg) + * would require that 'arg' be passed as a value in a register + * (or set of registers) according to the rules in $5.4 Parameter + * Passing, then the result is returned in the same registers + * as would be used for such an argument. + */ +ReturnMechanism::ReturnMechanism(MIRType &retTy, BECommon &be) + : regCount(0), reg0(kRinvalid), reg1(kRinvalid), primTypeOfReg0(kPtyInvalid), primTypeOfReg1(kPtyInvalid) { + PrimType pType = retTy.GetPrimType(); + switch (pType) { + case PTY_void: + break; + case PTY_u1: + case PTY_u8: + case PTY_i8: + case PTY_u16: + case PTY_i16: + case PTY_a32: + case PTY_u32: + case PTY_i32: + regCount = 1; + reg0 = AArch64Abi::intReturnRegs[0]; + primTypeOfReg0 = IsSignedInteger(pType) ? PTY_i32 : PTY_u32; /* promote the type */ + return; + + case PTY_ptr: + case PTY_ref: + CHECK_FATAL(false, "PTY_ptr should have been lowered"); + return; + + case PTY_a64: + case PTY_u64: + case PTY_i64: + regCount = 1; + reg0 = AArch64Abi::intReturnRegs[0]; + primTypeOfReg0 = IsSignedInteger(pType) ? PTY_i64 : PTY_u64; /* promote the type */ + return; + + /* + * for c64 complex numbers, we assume + * - callers marshall the two f32 numbers into one f64 register + * - callees de-marshall one f64 value into the real and the imaginery part + */ + case PTY_f32: + case PTY_f64: + case PTY_c64: + + /* + * for c128 complex numbers, we assume + * - callers marshall the two f64 numbers into one f128 register + * - callees de-marshall one f128 value into the real and the imaginery part + */ + case PTY_c128: + regCount = 1; + reg0 = AArch64Abi::floatReturnRegs[0]; + primTypeOfReg0 = pType; + return; + + /* + * Refer to ARM IHI 0055C_beta: Procedure Call Standard for + * the ARM 64-bit Architecture. $5.5 + * "Otherwise, the caller shall reserve a block of memory of + * sufficient size and alignment to hold the result. The + * address of the memory block shall be passed as an additional + * argument to the function in x8. The callee may modify the + * result memory block at any point during the execution of the + * subroutine (there is no requirement for the callee to preserve + * the value stored in x8)." + */ + case PTY_agg: { + uint64 size = be.GetTypeSize(retTy.GetTypeIndex()); + if ((size > k16ByteSize) || (size == 0)) { + /* + * The return value is returned via memory. + * The address is in X8 and passed by the caller. + */ + SetupToReturnThroughMemory(); + return; + } + AArch64ArgumentClass classes[kMaxRegCount]; + regCount = static_cast(ClassifyAggregate(be, retTy, classes, + sizeof(classes) / sizeof(AArch64ArgumentClass))); + if (regCount == 0) { + SetupToReturnThroughMemory(); + return; + } else { + if (regCount == 1) { + /* passing in registers */ + if (classes[0] == kAArch64FloatClass) { + reg0 = AArch64Abi::floatReturnRegs[0]; + primTypeOfReg0 = PTY_f64; + } else { + reg0 = AArch64Abi::intReturnRegs[0]; + primTypeOfReg0 = PTY_i64; + } + } else { + ASSERT(regCount == kMaxRegCount, "reg count from ClassifyAggregate() should be 0, 1, or 2"); + ASSERT(classes[0] == kAArch64IntegerClass, "error val :classes[0]"); + ASSERT(classes[1] == kAArch64IntegerClass, "error val :classes[1]"); + reg0 = AArch64Abi::intReturnRegs[0]; + primTypeOfReg0 = PTY_i64; + reg1 = AArch64Abi::intReturnRegs[1]; + primTypeOfReg1 = PTY_i64; + } + return; + } + } + default: + CHECK_FATAL(false, "NYI"); + } +} + +void ReturnMechanism::SetupSecondRetReg(const MIRType &retTy2) { + ASSERT(reg1 == kRinvalid, "make sure reg1 equal kRinvalid"); + PrimType pType = retTy2.GetPrimType(); + switch (pType) { + case PTY_void: + break; + case PTY_u1: + case PTY_u8: + case PTY_i8: + case PTY_u16: + case PTY_i16: + case PTY_a32: + case PTY_u32: + case PTY_i32: + case PTY_ptr: + case PTY_ref: + case PTY_a64: + case PTY_u64: + case PTY_i64: + reg1 = AArch64Abi::intReturnRegs[1]; + primTypeOfReg1 = IsSignedInteger(pType) ? PTY_i64 : PTY_u64; /* promote the type */ + break; + default: + CHECK_FATAL(false, "NYI"); + } +} + +/* + * From "ARM Procedure Call Standard for ARM 64-bit Architecture" + * ARM IHI 0055C_beta, 6th November 2013 + * $ 5.1 machine Registers + * $ 5.1.1 General-Purpose Registers + * Note + * SP Stack Pointer + * R30/LR Link register Stores the return address. + * We push it into stack along with FP on function + * entry using STP and restore it on function exit + * using LDP even if the function is a leaf (i.e., + * it does not call any other function) because it + * is free (we have to store FP anyway). So, if a + * function is a leaf, we may use it as a temporary + * register. + * R29/FP Frame Pointer + * R19-R28 Callee-saved + * registers + * R18 Platform reg Can we use it as a temporary register? + * R16,R17 IP0,IP1 Maybe used as temporary registers. Should be + * given lower priorities. (i.e., we push them + * into the free register stack before the others) + * R9-R15 Temporary registers, caller-saved + * Note: + * R16 and R17 may be used by a linker as a scratch register between + * a routine and any subroutine it calls. They can also be used within a + * routine to hold intermediate values between subroutine calls. + * + * The role of R18 is platform specific. If a platform ABI has need of + * a dedicated general purpose register to carry inter-procedural state + * (for example, the thread context) then it should use this register for + * that purpose. If the platform ABI has no such requirements, then it should + * use R18 as an additional temporary register. The platform ABI specification + * must document the usage for this register. + * + * A subroutine invocation must preserve the contents of the registers R19-R29 + * and SP. All 64 bits of each value stored in R19-R29 must be preserved, even + * when using the ILP32 data model. + * + * $ 5.1.2 SIMD and Floating-Point Registers + * + * The first eight registers, V0-V7, are used to pass argument values into + * a subroutine and to return result values from a function. They may also + * be used to hold intermediate values within a routine. + * + * V8-V15 must be preserved by a callee across subroutine calls; the + * remaining registers do not need to be preserved( or caller-saved). + * Additionally, only the bottom 64 bits of each value stored in V8- + * V15 need to be preserved. + */ +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_args.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_args.cpp new file mode 100644 index 0000000000000000000000000000000000000000..44c54f62168495290e4191b104dff2d6babfd68d --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_args.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_args.h" +#include +#include "riscv64_cgfunc.h" + +namespace maplebe { +using namespace maple; + +void AArch64MoveRegArgs::Run() { + MoveVRegisterArgs(); + MoveRegisterArgs(); +} + +void AArch64MoveRegArgs::CollectRegisterArgs(std::map &argsList, + std::vector &indexList) const { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + ParmLocator parmlocator(aarchCGFunc->GetBecommon()); + PLocInfo ploc; + for (uint32 i = 0; i < aarchCGFunc->GetFunction().GetFormalCount(); ++i) { + MIRType *ty = aarchCGFunc->GetFunction().GetNthParamType(i); + parmlocator.LocateNextParm(*ty, ploc, i == 0); + if (ploc.reg0 == kRinvalid) { + continue; + } + aarchCGFunc->PushElemIntoFormalRegList(ploc.reg0); + MIRSymbol *sym = aarchCGFunc->GetFunction().GetFormal(i); + if (sym->IsPreg()) { + continue; + } + argsList[i] = ploc.reg0; + indexList.emplace_back(i); + if (ploc.reg1 == kRinvalid) { + continue; + } + aarchCGFunc->PushElemIntoFormalRegList(ploc.reg1); + } +} + +ArgInfo AArch64MoveRegArgs::GetArgInfo(std::map &argsList, uint32 argIndex) const { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + ArgInfo argInfo; + argInfo.reg = argsList[argIndex]; + argInfo.mirTy = aarchCGFunc->GetFunction().GetNthParamType(argIndex); + argInfo.symSize = aarchCGFunc->GetBecommon().GetTypeSize(argInfo.mirTy->GetTypeIndex()); + argInfo.stkSize = (argInfo.symSize < k4ByteSize) ? k4ByteSize : argInfo.symSize; + argInfo.regType = (argInfo.reg < V0) ? kRegTyInt : kRegTyFloat; + argInfo.sym = aarchCGFunc->GetFunction().GetFormal(argIndex); + CHECK_NULL_FATAL(argInfo.sym); + argInfo.symLoc = + static_cast(aarchCGFunc->GetMemlayout()->GetSymAllocInfo(argInfo.sym->GetStIndex())); + CHECK_NULL_FATAL(argInfo.symLoc); + return argInfo; +} + +bool AArch64MoveRegArgs::IsInSameSegment(const ArgInfo &firstArgInfo, const ArgInfo &secondArgInfo) const { + if (firstArgInfo.symLoc->GetMemSegment() != secondArgInfo.symLoc->GetMemSegment()) { + return false; + } + if (firstArgInfo.symSize != secondArgInfo.symSize) { + return false; + } + if (firstArgInfo.symSize != k4ByteSize && firstArgInfo.symSize != k8ByteSize) { + return false; + } + if (firstArgInfo.regType != secondArgInfo.regType) { + return false; + } + return firstArgInfo.symLoc->GetOffset() + firstArgInfo.stkSize == secondArgInfo.symLoc->GetOffset(); +} + +void AArch64MoveRegArgs::GenerateStpInsn(const ArgInfo &firstArgInfo, const ArgInfo &secondArgInfo) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + AArch64RegOperand *baseOpnd = static_cast(aarchCGFunc->GetBaseReg(*firstArgInfo.symLoc)); + RegOperand ®Opnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(firstArgInfo.reg, + firstArgInfo.stkSize * kBitsPerByte, + firstArgInfo.regType); + MOperator mOp = firstArgInfo.regType == kRegTyInt ? ((firstArgInfo.stkSize > k4ByteSize) ? MOP_xstp : MOP_wstp) + : ((firstArgInfo.stkSize > k4ByteSize) ? MOP_dstp : MOP_sstp); + RegOperand ®Opnd2 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(secondArgInfo.reg, + firstArgInfo.stkSize * kBitsPerByte, + firstArgInfo.regType); + + int32 limit = (secondArgInfo.stkSize > k4ByteSize) ? kStpLdpImm64UpperBound : kStpLdpImm32UpperBound; + int32 stOffset = aarchCGFunc->GetBaseOffset(*firstArgInfo.symLoc); + MemOperand *memOpnd = nullptr; + if (stOffset > limit || baseReg != nullptr) { + if (baseReg == nullptr || lastSegment != firstArgInfo.symLoc->GetMemSegment()) { + AArch64ImmOperand &immOpnd = + aarchCGFunc->CreateImmOperand(stOffset - firstArgInfo.symLoc->GetOffset(), k64BitSize, false); + baseReg = &aarchCGFunc->CreateRegisterOperandOfType(kRegTyInt, k8ByteSize); + lastSegment = firstArgInfo.symLoc->GetMemSegment(); + aarchCGFunc->SelectAdd(*baseReg, *baseOpnd, immOpnd, PTY_a64); + } + AArch64OfstOperand &offsetOpnd = aarchCGFunc->CreateOfstOpnd(firstArgInfo.symLoc->GetOffset(), k32BitSize); + memOpnd = aarchCGFunc->GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, + firstArgInfo.stkSize * kBitsPerByte, + *baseReg, nullptr, &offsetOpnd, firstArgInfo.sym); + } else { + AArch64OfstOperand &offsetOpnd = aarchCGFunc->CreateOfstOpnd(stOffset, k32BitSize); + memOpnd = aarchCGFunc->GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, + firstArgInfo.stkSize * kBitsPerByte, + *baseOpnd, nullptr, &offsetOpnd, firstArgInfo.sym); + } + Insn &pushInsn = aarchCGFunc->GetCG()->BuildInstruction(mOp, regOpnd, regOpnd2, *memOpnd); + if (aarchCGFunc->GetCG()->GenerateVerboseCG()) { + std::string argName = firstArgInfo.sym->GetName() + " " + secondArgInfo.sym->GetName(); + pushInsn.SetComment(std::string("store param: ").append(argName)); + } + aarchCGFunc->GetCurBB()->AppendInsn(pushInsn); +} + +void AArch64MoveRegArgs::GenerateStrInsn(ArgInfo &argInfo) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + int32 stOffset = aarchCGFunc->GetBaseOffset(*argInfo.symLoc); + AArch64RegOperand *baseOpnd = static_cast(aarchCGFunc->GetBaseReg(*argInfo.symLoc)); + RegOperand ®Opnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(argInfo.reg, argInfo.stkSize * kBitsPerByte, argInfo.regType); + MemOperand *memOpnd = nullptr; + if (AArch64MemOperand::IsPIMMOffsetOutOfRange(stOffset, argInfo.symSize * kBitsPerByte) || + (baseReg != nullptr && (lastSegment == argInfo.symLoc->GetMemSegment()))) { + if (baseReg == nullptr || lastSegment != argInfo.symLoc->GetMemSegment()) { + AArch64ImmOperand &immOpnd = aarchCGFunc->CreateImmOperand(stOffset - argInfo.symLoc->GetOffset(), k64BitSize, + false); + baseReg = &aarchCGFunc->CreateRegisterOperandOfType(kRegTyInt, k8ByteSize); + lastSegment = argInfo.symLoc->GetMemSegment(); + aarchCGFunc->SelectAdd(*baseReg, *baseOpnd, immOpnd, PTY_a64); + } + AArch64OfstOperand &offsetOpnd = aarchCGFunc->CreateOfstOpnd(argInfo.symLoc->GetOffset(), k32BitSize); + memOpnd = aarchCGFunc->GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, + argInfo.symSize * kBitsPerByte, *baseReg, + nullptr, &offsetOpnd, argInfo.sym); + } else { + AArch64OfstOperand &offsetOpnd = aarchCGFunc->CreateOfstOpnd(stOffset, k32BitSize); + memOpnd = aarchCGFunc->GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, + argInfo.symSize * kBitsPerByte, *baseOpnd, + nullptr, &offsetOpnd, argInfo.sym); + } + + MOperator mOp = aarchCGFunc->PickStInsn(argInfo.symSize * kBitsPerByte, argInfo.mirTy->GetPrimType()); + Insn &insn = aarchCGFunc->GetCG()->BuildInstruction(mOp, regOpnd, *memOpnd); + if (aarchCGFunc->GetCG()->GenerateVerboseCG()) { + insn.SetComment(std::string("store param: ").append(argInfo.sym->GetName())); + } + aarchCGFunc->GetCurBB()->AppendInsn(insn); +} + +void AArch64MoveRegArgs::MoveRegisterArgs() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + BB *formerCurBB = aarchCGFunc->GetCurBB(); + aarchCGFunc->GetDummyBB()->ClearInsns(); + aarchCGFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + + std::map movePara; + std::vector moveParaIndex; + CollectRegisterArgs(movePara, moveParaIndex); + + std::vector::iterator it; + std::vector::iterator next; + for (it = moveParaIndex.begin(); it != moveParaIndex.end(); ++it) { + uint32 firstIndex = *it; + ArgInfo firstArgInfo = GetArgInfo(movePara, firstIndex); + next = it; + ++next; + if (next != moveParaIndex.end()) { + uint32 secondIndex = *next; + ArgInfo secondArgInfo = GetArgInfo(movePara, secondIndex); + /* Make sure they are in same segment if want to use stp */ + if (IsInSameSegment(firstArgInfo, secondArgInfo)) { + GenerateStpInsn(firstArgInfo, secondArgInfo); + it = next; + continue; + } + } + GenerateStrInsn(firstArgInfo); + } + + aarchCGFunc->GetFirstBB()->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + aarchCGFunc->SetCurBB(*formerCurBB); +} + +void AArch64MoveRegArgs::MoveLocalRefVarToRefLocals(MIRSymbol &mirSym) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + PrimType stype = mirSym.GetType()->GetPrimType(); + uint32 byteSize = GetPrimTypeSize(stype); + uint32 bitSize = byteSize * kBitsPerByte; + MemOperand &memOpnd = aarchCGFunc->GetOrCreateMemOpnd(mirSym, 0, bitSize, true); + RegOperand *regOpnd = nullptr; + if (mirSym.IsPreg()) { + PregIdx pregIdx = aarchCGFunc->GetFunction().GetPregTab()->GetPregIdxFromPregno(mirSym.GetPreg()->GetPregNo()); + regOpnd = &aarchCGFunc->GetOrCreateVirtualRegisterOperand(aarchCGFunc->GetVirtualRegNOFromPseudoRegIdx(pregIdx)); + } else { + regOpnd = &aarchCGFunc->GetOrCreateVirtualRegisterOperand(aarchCGFunc->NewVReg(kRegTyInt, k8ByteSize)); + } + Insn &insn = aarchCGFunc->GetCG()->BuildInstruction( + aarchCGFunc->PickLdInsn(GetPrimTypeBitSize(stype), stype), *regOpnd, memOpnd); + MemOperand &memOpnd1 = aarchCGFunc->GetOrCreateMemOpnd(mirSym, 0, bitSize, false); + Insn &insn1 = aarchCGFunc->GetCG()->BuildInstruction( + aarchCGFunc->PickStInsn(GetPrimTypeBitSize(stype), stype), *regOpnd, memOpnd1); + aarchCGFunc->GetCurBB()->InsertInsnBegin(insn1); + aarchCGFunc->GetCurBB()->InsertInsnBegin(insn); +} + + +void AArch64MoveRegArgs::LoadStackArgsToVReg(MIRSymbol &mirSym) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + PrimType stype = mirSym.GetType()->GetPrimType(); + uint32 byteSize = GetPrimTypeSize(stype); + uint32 bitSize = byteSize * kBitsPerByte; + MemOperand &memOpnd = aarchCGFunc->GetOrCreateMemOpnd(mirSym, 0, bitSize); + PregIdx pregIdx = aarchCGFunc->GetFunction().GetPregTab()->GetPregIdxFromPregno(mirSym.GetPreg()->GetPregNo()); + RegOperand &dstRegOpnd = aarchCGFunc->GetOrCreateVirtualRegisterOperand( + aarchCGFunc->GetVirtualRegNOFromPseudoRegIdx(pregIdx)); + Insn &insn = aarchCGFunc->GetCG()->BuildInstruction( + aarchCGFunc->PickLdInsn(GetPrimTypeBitSize(stype), stype), dstRegOpnd, memOpnd); + + if (aarchCGFunc->GetCG()->GenerateVerboseCG()) { + std::string key = "param: %%"; + key += std::to_string(mirSym.GetPreg()->GetPregNo()); + ASSERT(mirSym.GetStorageClass() == kScFormal, "vreg parameters should be kScFormal type."); + insn.SetComment(key); + } + + aarchCGFunc->GetCurBB()->InsertInsnBegin(insn); +} + +void AArch64MoveRegArgs::MoveArgsToVReg(const PLocInfo &ploc, MIRSymbol &mirSym) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + RegType regType = (ploc.reg0 < V0) ? kRegTyInt : kRegTyFloat; + PrimType stype = mirSym.GetType()->GetPrimType(); + uint32 byteSize = GetPrimTypeSize(stype); + uint32 srcBitSize = ((byteSize < k4ByteSize) ? k4ByteSize : byteSize) * kBitsPerByte; + PregIdx pregIdx = aarchCGFunc->GetFunction().GetPregTab()->GetPregIdxFromPregno(mirSym.GetPreg()->GetPregNo()); + RegOperand &dstRegOpnd = + aarchCGFunc->GetOrCreateVirtualRegisterOperand(aarchCGFunc->GetVirtualRegNOFromPseudoRegIdx(pregIdx)); + dstRegOpnd.SetSize(srcBitSize); + RegOperand &srcRegOpnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(ploc.reg0, srcBitSize, regType); + ASSERT(mirSym.GetStorageClass() == kScFormal, "should be args"); + MOperator mOp = aarchCGFunc->PickMovInsn(srcBitSize, regType); + + Insn &insn = aarchCGFunc->GetCG()->BuildInstruction(mOp, dstRegOpnd, srcRegOpnd); + if (aarchCGFunc->GetCG()->GenerateVerboseCG()) { + std::string key = "param: %%"; + key += std::to_string(mirSym.GetPreg()->GetPregNo()); + insn.SetComment(key); + } + aarchCGFunc->GetCurBB()->InsertInsnBegin(insn); +} + +void AArch64MoveRegArgs::MoveVRegisterArgs() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + BB *formerCurBB = aarchCGFunc->GetCurBB(); + aarchCGFunc->GetDummyBB()->ClearInsns(); + aarchCGFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + ParmLocator parmlocator(aarchCGFunc->GetBecommon()); + PLocInfo ploc; + + uint32 formalCount = static_cast(aarchCGFunc->GetFunction().GetFormalCount()); + uint32 start = 0; + if (formalCount) { + MIRFunction *func = const_cast(aarchCGFunc->GetBecommon().GetMIRModule().CurFunction()); + if (aarchCGFunc->GetBecommon().HasFuncReturnType(*func)) { + TyIdx idx = aarchCGFunc->GetBecommon().GetFuncReturnType(*func); + if (aarchCGFunc->GetBecommon().GetTypeSize(idx) <= k16BitSize) { + start = 1; + } + } + } + for (uint32 i = start; i < formalCount; ++i) { + MIRType *ty = aarchCGFunc->GetFunction().GetNthParamType(i); + parmlocator.LocateNextParm(*ty, ploc, i == 0); + MIRSymbol *sym = aarchCGFunc->GetFunction().GetFormal(i); + + /* load locarefvar formals to store in the reflocals. */ + if (aarchCGFunc->GetFunction().GetNthParamAttr(i).GetAttr(ATTR_localrefvar) && ploc.reg0 == kRinvalid) { + MoveLocalRefVarToRefLocals(*sym); + } + + if (!sym->IsPreg()) { + continue; + } + + if (ploc.reg0 == kRinvalid) { + /* load stack parameters to the vreg. */ + LoadStackArgsToVReg(*sym); + } else { + MoveArgsToVReg(ploc, *sym); + } + } + + aarchCGFunc->GetFirstBB()->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + aarchCGFunc->SetCurBB(*formerCurBB); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_cg.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_cg.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5260c28227d463748033369a37fb42b12376cb5a --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_cg.cpp @@ -0,0 +1,290 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_cg.h" +#include "riscv64_rt.h" +#include "mir_builder.h" +#include "becommon.h" + +namespace maplebe { +#include "riscv64_opnd.def" +#define DEFINE_MOP(...) {__VA_ARGS__}, +const AArch64MD AArch64CG::kMd[kMopLast] = { +#include "riscv64_md.def" +}; +#undef DEFINE_MOP + +std::array, kIntRegTypeNum> AArch64CG::intRegNames = { + std::array { + "err", "err0", "err1", "err2", "err3", "err4", "err5", "err6", "err7", "err8", "err9", "err10", + "err11", "err12", "err13", "err14", "err15", "err16", "err17", "err18", "err19", "err20", "err21", "err22", + "err23", "err24", "err25", "err26", "err27", "err28", "err", "err", "errsp", "errzr", /* x29 is fp */ + "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "b10", "b11", + "b12", "b13", "b14", "b15", "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", + "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31", "errMaxRegNum", "rflag" }, + std::array { + "err", "err0", "err1", "err2", "err3", "err4", "err5", "err6", "err7", "err8", "err9", "err10", + "err11", "err12", "err13", "err14", "err15", "err16", "err17", "err18", "err19", "err20", "err21", "err22", + "err23", "err24", "err25", "err26", "err27", "err28", "err29", "err30", "errsp", "errzr", /* x29 is fp */ + "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "h10", "h11", + "h12", "h13", "h14", "h15", "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", + "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", "errMaxRegNum", "rflag" }, + std::array { + "err", "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", + "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "err", "err", + "wsp", "wzr", /* x29 is fp */ + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + "errMaxRegNum", "rflag" }, + std::array { + "err", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", + "sp", "xzr", /* x29 is fp */ + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "errMaxRegNum", "rflag" }, + std::array { + "err", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", + "sp", "xzr", /* x29 is fp */ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "errMaxRegNum", "rflag" } +}; + +bool AArch64CG::IsExclusiveFunc(MIRFunction &mirFunc) { + const std::string &funcName = mirFunc.GetName(); + for (const auto &it : ehExclusiveNameVec) { + if (it.compare(funcName) == 0) { + return true; + } + } + return false; +} +namespace wordsMap { + /* + * Generate object maps. + * + * 1. each class record its GCTIB in method meta (not read only meta) + * 2. GCTIB include: header protoType; n bitmap word; bitmap word + * 3. each reference word(4 or 8 bytes) is represented by 2 bits + * 00: not ref + * 01: normal ref + * 10: weak ref + * 11: unowned ref + * + * For example, if a scalar object has five ptr fields at offsets 24, 40(weak), + * 64(unowned), the generated code will be like: + * + * MCC_GCTIB__xxx: + * .long 0x40 // object has child reference + * .long 1 // one word in the bitmap + * .quad 0b110000100001000000 + * ... + */ + const uint32 kRefWordsPerMapWord = 32; /* contains bitmap for 32 ref words in 64 bits */ + const uint32 kLogRefWordsPerMapWord = 5; +#ifdef USE_32BIT_REF + const uint32 kReferenceWordSize = 4; + const uint32 kLog2ReferenceWordSize = 2; +#else + const uint32 kReferenceWordSize = 8; + const uint32 kLog2ReferenceWordSize = 3; +#endif + const uint32 kInMapWordOffsetMask = ((kReferenceWordSize * kRefWordsPerMapWord) - 1); + const uint32 kInMapWordIndexShift = (kLog2ReferenceWordSize - 1); + const uint32 kMapWordIndexShift = (kLog2ReferenceWordSize + kLogRefWordsPerMapWord); + + const uint64 kRefBits = 1; + const uint64 kWeakRefBits = 2; + const uint64 kUnownedRefBits = 3; + + /* + * Give a structrue type, calculate its bitmap_vector + */ + static void GetGCTIBBitMapWords(const BECommon &beCommon, MIRStructType &stType, std::vector &bitmapWords) { + bitmapWords.clear(); + if (stType.GetKind() == kTypeClass) { + uint64 curBitmap = 0; + uint32 curBitmapIndex = 0; + uint32 prevOffset = 0; + for (const auto &fieldInfo : beCommon.GetJClassLayout(static_cast(stType))) { + if (fieldInfo.IsRef()) { + uint32 curOffset = fieldInfo.GetOffset(); + /* skip meta field */ + if (curOffset == 0) { + continue; + } + CHECK_FATAL((curOffset > prevOffset) || (prevOffset == 0), "not ascending offset"); + uint32 wordIndex = curOffset >> kMapWordIndexShift; + if (wordIndex > curBitmapIndex) { + bitmapWords.emplace_back(curBitmap); + for (uint32 i = curBitmapIndex + 1; i < wordIndex; i++) { + bitmapWords.emplace_back(0); + } + curBitmap = 0; + curBitmapIndex = wordIndex; + } + uint32 bitOffset = (curOffset & kInMapWordOffsetMask) >> kInMapWordIndexShift; + if (CGOptions::IsGCOnly()) { + /* ignore unowned/weak when GCONLY is enabled. */ + curBitmap |= (kRefBits << bitOffset); + } else if (fieldInfo.IsUnowned()) { + curBitmap |= (kUnownedRefBits << bitOffset); + } else if (fieldInfo.IsWeak()) { + curBitmap |= (kWeakRefBits << bitOffset); + } else { + /* ref */ + curBitmap |= (kRefBits << bitOffset); + } + prevOffset = curOffset; + } + } + if (curBitmap != 0) { + bitmapWords.emplace_back(curBitmap); + } + } else if (stType.GetKind() != kTypeInterface) { + /* interface doesn't have reference fields */ + CHECK_FATAL(false, "GetGCTIBBitMapWords unexpected type"); + } + } +} +/* + * Find if there exist same GCTIB (both rcheader and bitmap are same) + * for different class. If ture reuse, if not emit and record new GCTIB. + */ +void AArch64CG::FindOrCreateRepresentiveSym(std::vector &bitmapWords, uint32 rcHeader, + const std::string &name) { + GCTIBKey *key = memPool->New(allocator, rcHeader, bitmapWords); + const std::string &gcTIBName = GCTIB_PREFIX_STR + name; + MapleUnorderedMap::const_iterator iter = keyPatternMap.find(key); + if (iter == keyPatternMap.end() || gcTIBName.compare("MCC_GCTIB__Ljava_2Flang_2FObject_3B") == 0) { + /* Emit the GCTIB label for the class */ + GCTIBPattern *ptn = memPool->New(*key, *memPool); + + if (gcTIBName.compare("MCC_GCTIB__Ljava_2Flang_2FObject_3B") == 0) { + ptn->SetName("MCC_GCTIB__Ljava_2Flang_2FObject_3B"); + } + (void)keyPatternMap.insert(std::make_pair(key, ptn)); + (void)symbolPatternMap.insert(std::make_pair(gcTIBName, ptn)); + + /* Emit GCTIB pattern */ + std::string ptnString = "\t.type " + ptn->GetName() + ", %object\n" + "\t.data\n" + "\t.align 3\n"; + + MIRSymbol *gcTIBSymbol = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(namemangler::GetInternalNameLiteral(gcTIBName))); + if (gcTIBSymbol != nullptr && gcTIBSymbol->GetStorageClass() == kScFstatic) { + ptnString += "\t.local "; + } else { + ptnString += "\t.global "; + } + + Emitter *emitter = GetEmitter(); + emitter->Emit(ptnString); + emitter->Emit(ptn->GetName()); + emitter->Emit("\n"); + + /* Emit the GCTIB pattern label for the class */ + emitter->Emit(ptn->GetName()); + emitter->Emit(":\n"); + + emitter->Emit("\t.long "); + emitter->EmitHexUnsigned(rcHeader); + emitter->Emit("\n"); + + /* generate n_bitmap word */ + emitter->Emit("\t.long "); /* AArch64-specific. Generate a 64-bit value. */ + emitter->EmitDecUnsigned(bitmapWords.size()); + emitter->Emit("\n"); + + /* Emit each bitmap word */ + for (const auto &bitmapWord : bitmapWords) { + if (!IsQuiet()) { + LogInfo::MapleLogger() << " bitmap_word: 0x"<< bitmapWord << " " << PRIx64 << "\n"; + } + emitter->Emit("\t.quad "); /* AArch64-specific. Generate a 64-bit value. */ + emitter->EmitHexUnsigned(bitmapWord); + emitter->Emit("\n"); + } + if (gcTIBSymbol != nullptr && gcTIBSymbol->GetStorageClass() != kScFstatic) { + /* add local symbol REF_XXX to every global GCTIB symbol */ + CreateRefSymForGlobalPtn(*ptn); + keyPatternMap[key] = ptn; + } + } else { + (void)symbolPatternMap.insert(make_pair(gcTIBName, iter->second)); + } +} + +/* + * Add local symbol REF_XXX to global GCTIB symbol, + * and replace the global GCTIBPattern in keyPatternMap. + */ +void AArch64CG::CreateRefSymForGlobalPtn(GCTIBPattern &ptn) { + const std::string &refPtnString = REF_PREFIX_STR + ptn.GetName(); + const std::string &ptnString = "\t.type " + refPtnString + ", %object\n" + + "\t.data\n" + + "\t.align 3\n" + + "\t.local " + refPtnString + "\n" + + refPtnString + ":\n" + + "\t.quad " + ptn.GetName() + "\n"; + Emitter *emitter = GetEmitter(); + emitter->Emit(ptnString); + ptn.SetName(refPtnString); +} + +std::string AArch64CG::FindGCTIBPatternName(const std::string &name) const { + auto iter = symbolPatternMap.find(name); + if (iter == symbolPatternMap.end()) { + CHECK_FATAL(false, "No GCTIB pattern found for symbol: %s", name.c_str()); + } + return iter->second->GetName(); +} + +void AArch64CG::GenerateObjectMaps(BECommon &beCommon) { + if (!IsQuiet()) { + LogInfo::MapleLogger() << "DEBUG: Generating object maps...\n"; + } + + for (auto &tyId : GetMIRModule()->GetClassList()) { + if (!IsQuiet()) { + LogInfo::MapleLogger() << "Class tyIdx: " << tyId << "\n"; + } + TyIdx tyIdx(tyId); + MIRType *ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(tyIdx); + ASSERT(ty != nullptr, "ty nullptr check"); + /* Only emit GCTIB for classes owned by this module */ + ASSERT(ty->IsStructType(), "ty isn't MIRStructType* in AArch64CG::GenerateObjectMaps"); + MIRStructType *strTy = static_cast(ty); + if (!strTy->IsLocal()) { + continue; + } + + GStrIdx nameIdx = ty->GetNameStrIdx(); + + const std::string &name = GlobalTables::GetStrTable().GetStringFromStrIdx(nameIdx); + + /* Emit for a class */ + if (!IsQuiet()) { + LogInfo::MapleLogger() << " name: " << name << "\n"; + } + + std::vector bitmapWords; + wordsMap::GetGCTIBBitMapWords(beCommon, *strTy, bitmapWords); + /* fill specific header according to the size of bitmapWords */ + uint32 rcHeader = (!bitmapWords.empty()) ? 0x40 : 0; + FindOrCreateRepresentiveSym(bitmapWords, rcHeader, name); + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_cgfunc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..597c3a0ac101517bb9c639cadf3693cc5f003feb --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_cgfunc.cpp @@ -0,0 +1,7148 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_cg.h" +#include "riscv64_cgfunc.h" +#include +#include +#include +#include "cfi.h" +#include "mpl_logging.h" +#include "riscv64_rt.h" +#include "opcode_info.h" +#include "mir_builder.h" +#include "mpl_atomic.h" +#include "metadata_layout.h" +#include "emit.h" + +namespace maplebe { +using namespace maple; +CondOperand AArch64CGFunc::ccOperands[kCcLast] = { +#define CONDCODE(a) CondOperand(CC_##a), +#include "riscv64_cc.def" +#undef CONDCODE +}; + +namespace { +constexpr int32 kSignedDimension = 2; /* signed and unsigned */ +constexpr int32 kIntByteSizeDimension = 4; /* 1 byte, 2 byte, 4 bytes, 8 bytes */ +constexpr int32 kFloatByteSizeDimension = 2; /* 4 bytes, 8 bytes */ +constexpr int32 kShiftAmount12 = 12; /* for instruction that can use shift, shift amount must be 0 or 12 */ + +MOperator ldIs[kSignedDimension][kIntByteSizeDimension] = { + /* unsigned == 0 */ + { MOP_wldrb, MOP_wldrh, MOP_wldr, MOP_xldr }, + /* signed == 1 */ + { MOP_wldrsb, MOP_wldrsh, MOP_wldr, MOP_xldr } +}; + +MOperator stIs[kSignedDimension][kIntByteSizeDimension] = { + /* unsigned == 0 */ + { MOP_wstrb, MOP_wstrh, MOP_wstr, MOP_xstr }, + /* signed == 1 */ + { MOP_wstrb, MOP_wstrh, MOP_wstr, MOP_xstr } +}; + +MOperator ldIsAcq[kSignedDimension][kIntByteSizeDimension] = { + /* unsigned == 0 */ + { MOP_wldarb, MOP_wldarh, MOP_wldar, MOP_xldar }, + /* signed == 1 */ + { MOP_undef, MOP_undef, MOP_wldar, MOP_xldar } +}; + +MOperator stIsRel[kSignedDimension][kIntByteSizeDimension] = { + /* unsigned == 0 */ + { MOP_wstlrb, MOP_wstlrh, MOP_wstlr, MOP_xstlr }, + /* signed == 1 */ + { MOP_wstlrb, MOP_wstlrh, MOP_wstlr, MOP_xstlr } +}; + +MOperator ldFs[kFloatByteSizeDimension] = { MOP_sldr, MOP_dldr }; +MOperator stFs[kFloatByteSizeDimension] = { MOP_sstr, MOP_dstr }; + +MOperator ldFsAcq[kFloatByteSizeDimension] = { MOP_undef, MOP_undef }; +MOperator stFsRel[kFloatByteSizeDimension] = { MOP_undef, MOP_undef }; + +MOperator PickLdStInsn(bool isLoad, uint32 bitSize, PrimType primType, AArch64isa::MemoryOrdering memOrd) { + ASSERT(__builtin_popcount(static_cast(memOrd)) <= 1, "must be kMoNone or kMoAcquire"); + ASSERT(primType != PTY_ptr, "should have been lowered"); + ASSERT(primType != PTY_ref, "should have been lowered"); + ASSERT(bitSize >= k8BitSize, "PTY_u1 should have been lowered?"); + ASSERT(__builtin_popcount(bitSize) == 1, "PTY_u1 should have been lowered?"); + if (isLoad) { + ASSERT((memOrd == AArch64isa::kMoNone) || (memOrd == AArch64isa::kMoAcquire) || + (memOrd == AArch64isa::kMoAcquireRcpc) || (memOrd == AArch64isa::kMoLoacquire), "unknown Memory Order"); + } else { + ASSERT((memOrd == AArch64isa::kMoNone) || (memOrd == AArch64isa::kMoRelease) || + (memOrd == AArch64isa::kMoLorelease), "unknown Memory Order"); + } + + /* __builtin_ffs(x) returns: 0 -> 0, 1 -> 1, 2 -> 2, 4 -> 3, 8 -> 4 */ + if (IsPrimitiveInteger(primType)) { + MOperator(*table)[kIntByteSizeDimension]; + if (isLoad) { + table = (memOrd == AArch64isa::kMoAcquire) ? ldIsAcq : ldIs; + } else { + table = (memOrd == AArch64isa::kMoRelease) ? stIsRel : stIs; + } + + int32 signedUnsigned = IsUnsignedInteger(primType) ? 0 : 1; + /* __builtin_ffs(x) returns: 8 -> 4, 16 -> 5, 32 -> 6, 64 -> 7 */ + uint32 size = static_cast(__builtin_ffs(static_cast(bitSize))) - 4; + ASSERT(size <= 3, "wrong bitSize"); + return table[signedUnsigned][size]; + } else { + MOperator *table = nullptr; + if (isLoad) { + table = (memOrd == AArch64isa::kMoAcquire) ? ldFsAcq : ldFs; + } else { + table = (memOrd == AArch64isa::kMoRelease) ? stFsRel : stFs; + } + + /* __builtin_ffs(x) returns: 32 -> 6, 64 -> 7 */ + uint32 size = static_cast(__builtin_ffs(static_cast(bitSize))) - 6; + ASSERT(size <= 1, "size must be 0 or 1"); + return table[size]; + } +} +} + +MOperator AArch64CGFunc::PickLdInsn(uint32 bitSize, PrimType primType, AArch64isa::MemoryOrdering memOrd) { + return PickLdStInsn(true, bitSize, primType, memOrd); +} + +MOperator AArch64CGFunc::PickStInsn(uint32 bitSize, PrimType primType, AArch64isa::MemoryOrdering memOrd) { + return PickLdStInsn(false, bitSize, primType, memOrd); +} + +MOperator AArch64CGFunc::PickMovInsn(PrimType primType) { + switch (primType) { + case PTY_u8: + case PTY_u16: + case PTY_u32: + case PTY_i8: + case PTY_i16: + case PTY_i32: + return MOP_wmovrr; + case PTY_a32: + ASSERT(false, "Invalid primitive type for AArch64"); + return MOP_undef; + case PTY_ptr: + case PTY_ref: + ASSERT(false, "PTY_ref and PTY_ptr should have been lowered"); + return MOP_undef; + case PTY_a64: + case PTY_u64: + case PTY_i64: + return MOP_xmovrr; + case PTY_f32: + return MOP_xvmovs; + case PTY_f64: + return MOP_xvmovd; + default: + ASSERT(false, "NYI PickMovInsn"); + return MOP_undef; + } +} + +MOperator AArch64CGFunc::PickMovInsn(RegOperand &lhs, RegOperand &rhs) { + CHECK_FATAL(lhs.GetRegisterType() == rhs.GetRegisterType(), "PickMovInsn: unequal kind NYI"); + CHECK_FATAL(lhs.GetSize() == rhs.GetSize(), "PickMovInsn: unequal size NYI"); + ASSERT(((lhs.GetSize() < k64BitSize) || (lhs.GetRegisterType() == kRegTyFloat)), + "should split the 64 bits or more mov"); + if (lhs.GetRegisterType() == kRegTyInt) { + return MOP_wmovrr; + } + if (lhs.GetRegisterType() == kRegTyFloat) { + return (lhs.GetSize() <= k32BitSize) ? MOP_xvmovs : MOP_xvmovd; + } + ASSERT(false, "PickMovInsn: kind NYI"); + return MOP_undef; +} + +MOperator AArch64CGFunc::PickMovInsn(uint32 bitLen, RegType regType) { + ASSERT((bitLen == k32BitSize) || (bitLen == k64BitSize), "size check"); + ASSERT((regType == kRegTyInt) || (regType == kRegTyFloat), "type check"); + if (regType == kRegTyInt) { + return (bitLen == k32BitSize) ? MOP_wmovrr : MOP_xmovrr; + } + return (bitLen == k32BitSize) ? MOP_xvmovs : MOP_xvmovd; +} + +void AArch64CGFunc::SelectLoadAcquire(Operand &dest, PrimType dtype, Operand &src, PrimType stype, + AArch64isa::MemoryOrdering memOrd, bool isDirect) { + ASSERT(src.GetKind() == Operand::kOpdMem, "Just checking"); + ASSERT(memOrd != AArch64isa::kMoNone, "Just checking"); + + uint32 ssize = isDirect ? src.GetSize() : GetPrimTypeBitSize(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + MOperator mOp = PickLdInsn(ssize, stype, memOrd); + + Operand *newSrc = &src; + auto &memOpnd = static_cast(src); + AArch64OfstOperand *immOpnd = memOpnd.GetOffsetImmediate(); + int32 offset = immOpnd->GetOffsetValue(); + RegOperand *origBaseReg = memOpnd.GetBaseRegister(); + if (offset != 0) { + RegOperand &resOpnd = CreateRegisterOperandOfType(PTY_i64); + ASSERT(origBaseReg != nullptr, "nullptr check"); + SelectAdd(resOpnd, *origBaseReg, *immOpnd, PTY_i64); + newSrc = &CreateReplacementMemOperand(ssize, resOpnd, 0); + } + + std::string key; + if (isDirect && GetCG()->GenerateVerboseCG()) { + const MIRSymbol *sym = static_cast(&src)->GetSymbol(); + if (sym != nullptr) { + MIRStorageClass sc = sym->GetStorageClass(); + if (sc == kScFormal) { + key = "param: "; + } else if (sc == kScAuto) { + key = "local var: "; + } else { + key = "global: "; + } + key.append(sym->GetName()); + } + } + + /* Check if the right load-acquire instruction is available. */ + if (mOp != MOP_undef) { + Insn &insn = GetCG()->BuildInstruction(mOp, dest, *newSrc); + if (isDirect && GetCG()->GenerateVerboseCG()) { + insn.SetComment(key); + } + GetCurBB()->AppendInsn(insn); + } else { + if (IsPrimitiveFloat(stype)) { + /* Uses signed integer version ldar followed by a floating-point move(fmov). */ + ASSERT(stype == dtype, "Just checking"); + PrimType itype = (stype == PTY_f32) ? PTY_i32 : PTY_i64; + RegOperand ®Opnd = CreateRegisterOperandOfType(itype); + Insn &insn = GetCG()->BuildInstruction(PickLdInsn(ssize, itype, memOrd), regOpnd, *newSrc); + if (isDirect && GetCG()->GenerateVerboseCG()) { + insn.SetComment(key); + } + GetCurBB()->AppendInsn(insn); + mOp = (stype == PTY_f32) ? MOP_xvmovsr : MOP_xvmovdr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, dest, regOpnd)); + } else { + /* Use unsigned version ldarb/ldarh followed by a sign-extension instruction(sxtb/sxth). */ + ASSERT((ssize == k8BitSize) || (ssize == k16BitSize), "Just checking"); + PrimType utype = (ssize == k8BitSize) ? PTY_u8 : PTY_u16; + Insn &insn = GetCG()->BuildInstruction(PickLdInsn(ssize, utype, memOrd), dest, *newSrc); + if (isDirect && GetCG()->GenerateVerboseCG()) { + insn.SetComment(key); + } + GetCurBB()->AppendInsn(insn); + mOp = ((dsize == k32BitSize) ? ((ssize == k8BitSize) ? MOP_xsxtb32 : MOP_xsxth32) + : ((ssize == k8BitSize) ? MOP_xsxtb64 : MOP_xsxth64)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, dest, dest)); + } + } +} + +void AArch64CGFunc::SelectStoreRelease(Operand &dest, PrimType dtype, Operand &src, PrimType stype, + AArch64isa::MemoryOrdering memOrd, bool isDirect) { + ASSERT(dest.GetKind() == Operand::kOpdMem, "Just checking"); + + uint32 dsize = isDirect ? dest.GetSize() : GetPrimTypeBitSize(stype); + MOperator mOp = PickStInsn(dsize, stype, memOrd); + + Operand *newDest = &dest; + AArch64MemOperand *memOpnd = static_cast(&dest); + AArch64OfstOperand *immOpnd = memOpnd->GetOffsetImmediate(); + int32 offset = immOpnd->GetOffsetValue(); + RegOperand *origBaseReg = memOpnd->GetBaseRegister(); + if (offset != 0) { + RegOperand &resOpnd = CreateRegisterOperandOfType(PTY_i64); + ASSERT(origBaseReg != nullptr, "nullptr check"); + SelectAdd(resOpnd, *origBaseReg, *immOpnd, PTY_i64); + newDest = &CreateReplacementMemOperand(dsize, resOpnd, 0); + } + + std::string key; + if (isDirect && GetCG()->GenerateVerboseCG()) { + const MIRSymbol *sym = static_cast(&dest)->GetSymbol(); + if (sym != nullptr) { + MIRStorageClass sc = sym->GetStorageClass(); + if (sc == kScFormal) { + key = "param: "; + } else if (sc == kScAuto) { + key = "local var: "; + } else { + key = "global: "; + } + key.append(sym->GetName()); + } + } + + /* Check if the right store-release instruction is available. */ + if (mOp != MOP_undef) { + Insn &insn = GetCG()->BuildInstruction(mOp, src, *newDest); + if (isDirect && GetCG()->GenerateVerboseCG()) { + insn.SetComment(key); + } + GetCurBB()->AppendInsn(insn); + } else { + /* Use a floating-point move(fmov) followed by a stlr. */ + ASSERT(IsPrimitiveFloat(stype), "must be float type"); + CHECK_FATAL(stype == dtype, "Just checking"); + PrimType itype = (stype == PTY_f32) ? PTY_i32 : PTY_i64; + RegOperand ®Opnd = CreateRegisterOperandOfType(itype); + mOp = (stype == PTY_f32) ? MOP_xvmovrs : MOP_xvmovrd; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, regOpnd, src)); + Insn &insn = GetCG()->BuildInstruction(PickStInsn(dsize, itype, memOrd), regOpnd, *newDest); + if (isDirect && GetCG()->GenerateVerboseCG()) { + insn.SetComment(key); + } + GetCurBB()->AppendInsn(insn); + } +} + +void AArch64CGFunc::SelectCopyImm(Operand &dest, ImmOperand &src, PrimType dtype) { + uint32 dsize = GetPrimTypeBitSize(dtype); + ASSERT(IsPrimitiveInteger(dtype), "The type of destination operand must be Integer"); + ASSERT(((dsize == k8BitSize) || (dsize == k16BitSize) || (dsize == k32BitSize) || (dsize == k64BitSize)), + "The destination operand must be >= 8-bit"); + if (src.IsSingleInstructionMovable()) { + MOperator mOp = (dsize == k32BitSize) ? MOP_xmovri32 : MOP_xmovri64; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, dest, src)); + return; + } + uint64 srcVal = static_cast(src.GetValue()); + /* using mov/movk to load the immediate value */ + if (dsize == k8BitSize) { + /* compute lower 8 bits value */ + if (dtype == PTY_u8) { + /* zero extend */ + srcVal = (srcVal << 56) >> 56; + dtype = PTY_u16; + } else { + /* sign extend */ + srcVal = ((static_cast(srcVal)) << 56) >> 56; + dtype = PTY_i16; + } + dsize = k16BitSize; + } + if (dsize == k16BitSize) { + if (dtype == PTY_u16) { + /* check lower 16 bits and higher 16 bits respectively */ + ASSERT((srcVal & 0x0000FFFFULL) != 0, "unexpected value"); + ASSERT(((srcVal >> k16BitSize) & 0x0000FFFFULL) == 0, "unexpected value"); + ASSERT((srcVal & 0x0000FFFFULL) != 0xFFFFULL, "unexpected value"); + /* create an imm opereand which represents lower 16 bits of the immediate */ + ImmOperand &srcLower = CreateImmOperand((srcVal & 0x0000FFFFULL), k16BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xmovri32, dest, srcLower)); + return; + } else { + /* sign extend and let `dsize == 32` case take care of it */ + srcVal = ((static_cast(srcVal)) << 48) >> 48; + dsize = k32BitSize; + } + } + if (dsize == k32BitSize) { + /* check lower 16 bits and higher 16 bits respectively */ + ASSERT((srcVal & 0x0000FFFFULL) != 0, "unexpected val"); + ASSERT(((srcVal >> k16BitSize) & 0x0000FFFFULL) != 0, "unexpected val"); + ASSERT((srcVal & 0x0000FFFFULL) != 0xFFFFULL, "unexpected val"); + ASSERT(((srcVal >> k16BitSize) & 0x0000FFFFULL) != 0xFFFFULL, "unexpected val"); + /* create an imm opereand which represents lower 16 bits of the immediate */ + ImmOperand &srcLower = CreateImmOperand((srcVal & 0x0000FFFFULL), k16BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xmovri32, dest, srcLower)); + /* create an imm opereand which represents upper 16 bits of the immediate */ + ImmOperand &srcUpper = CreateImmOperand(((srcVal >> k16BitSize) & 0x0000FFFFULL), k16BitSize, false); + LogicalShiftLeftOperand *lslOpnd = GetLogicalShiftLeftOperand(k16BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wmovkri16, dest, srcUpper, *lslOpnd)); + } else { + /* + * partition it into 4 16-bit chunks + * if more 0's than 0xFFFF's, use movz as the initial instruction. + * otherwise, movn. + */ + bool useMovz = BetterUseMOVZ(srcVal); + bool useMovk = false; + /* get lower 32 bits of the immediate */ + uint64 chunkLval = srcVal & 0xFFFFFFFFULL; + /* get upper 32 bits of the immediate */ + uint64 chunkHval = (srcVal >> k32BitSize) & 0xFFFFFFFFULL; + int32 maxLoopTime = 4; + + if (chunkLval == chunkHval) { + /* compute lower 32 bits, and then copy to higher 32 bits, so only 2 chunks need be processed */ + maxLoopTime = 2; + } + + uint64 sa = 0; + + for (int64 i = 0; i < maxLoopTime; ++i, sa += k16BitSize) { + /* create an imm opereand which represents the i-th 16-bit chunk of the immediate */ + uint64 chunkVal = (srcVal >> (static_cast(sa))) & 0x0000FFFFULL; + if (useMovz ? (chunkVal == 0) : (chunkVal == 0x0000FFFFULL)) { + continue; + } + ImmOperand &src16 = CreateImmOperand(chunkVal, k16BitSize, false); + LogicalShiftLeftOperand *lslOpnd = GetLogicalShiftLeftOperand(sa, true); + if (!useMovk) { + /* use movz or movn */ + if (!useMovz) { + src16.BitwiseNegate(); + } + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(useMovz ? MOP_xmovzri16 : MOP_xmovnri16, dest, src16, *lslOpnd)); + useMovk = true; + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xmovkri16, dest, src16, *lslOpnd)); + } + } + + if (maxLoopTime == 2) { + /* copy lower 32 bits to higher 32 bits */ + AArch64ImmOperand &immOpnd = CreateImmOperand(k32BitSize, k8BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MPO_xbfirri6i6, dest, dest, immOpnd, immOpnd)); + } + } +} + +void AArch64CGFunc::SelectCopyMemOpnd(Operand &dest, PrimType dtype, uint32 dsize, + Operand &src, PrimType stype) { + AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone; + const MIRSymbol *sym = static_cast(&src)->GetSymbol(); + if ((sym != nullptr) && (sym->GetStorageClass() == kScGlobal) && sym->GetAttr(ATTR_memory_order_acquire)) { + memOrd = AArch64isa::kMoAcquire; + } + + if (memOrd != AArch64isa::kMoNone) { + AArch64CGFunc::SelectLoadAcquire(dest, dtype, src, stype, memOrd, true); + return; + } + Insn *insn = nullptr; + uint32 ssize = src.GetSize(); + if (IsPrimitiveFloat(stype)) { + CHECK_FATAL(dsize == ssize, "dsize %u expect equals ssize %u", dtype, ssize); + insn = &GetCG()->BuildInstruction(PickLdInsn(ssize, stype), dest, src); + } else { + insn = &GetCG()->BuildInstruction(PickLdInsn(ssize, stype), dest, src); + } + + if (GetCG()->GenerateVerboseCG()) { + const MIRSymbol *symSecond = static_cast(&src)->GetSymbol(); + if (symSecond != nullptr) { + std::string key; + MIRStorageClass sc = symSecond->GetStorageClass(); + if (sc == kScFormal) { + key = "param: "; + } else if (sc == kScAuto) { + key = "local var: "; + } else { + key = "global: "; + } + insn->SetComment(key.append(symSecond->GetName())); + } + } + + GetCurBB()->AppendInsn(*insn); +} + +bool AArch64CGFunc::IsImmediateValueInRange(MOperator mOp, int64 immVal, bool is64Bits, + bool isIntactIndexed, bool isPostIndexed, bool isPreIndexed) const { + bool isInRange = false; + switch (mOp) { + case MOP_xstr: + case MOP_wstr: + isInRange = + (isIntactIndexed && + ((!is64Bits && (immVal >= kStrAllLdrAllImmLowerBound) && (immVal <= kStrLdrImm32UpperBound)) || + (is64Bits && (immVal >= kStrAllLdrAllImmLowerBound) && (immVal <= kStrLdrImm64UpperBound)))) || + ((isPostIndexed || isPreIndexed) && (immVal >= kStrLdrPerPostLowerBound) && + (immVal <= kStrLdrPerPostUpperBound)); + break; + case MOP_wstrb: + isInRange = + (isIntactIndexed && (immVal >= kStrAllLdrAllImmLowerBound) && (immVal <= kStrbLdrbImmUpperBound)) || + ((isPostIndexed || isPreIndexed) && (immVal >= kStrLdrPerPostLowerBound) && + (immVal <= kStrLdrPerPostUpperBound)); + break; + case MOP_wstrh: + isInRange = + (isIntactIndexed && (immVal >= kStrAllLdrAllImmLowerBound) && (immVal <= kStrhLdrhImmUpperBound)) || + ((isPostIndexed || isPreIndexed) && (immVal >= kStrLdrPerPostLowerBound) && + (immVal <= kStrLdrPerPostUpperBound)); + break; + default: + break; + } + return isInRange; +} + +bool AArch64CGFunc::IsStoreMop(MOperator mOp) const { + switch (mOp) { + case MOP_xstr: + case MOP_wstr: + case MOP_wstrb: + case MOP_wstrh: + return true; + default: + return false; + } +} + +void AArch64CGFunc::SplitMovImmOpndInstruction(int64 immVal, RegOperand &destReg) { + bool useMovz = BetterUseMOVZ(immVal); + bool useMovk = false; + /* get lower 32 bits of the immediate */ + uint64 chunkLval = static_cast(immVal) & 0xFFFFFFFFULL; + /* get upper 32 bits of the immediate */ + uint64 chunkHval = (static_cast(immVal) >> k32BitSize) & 0xFFFFFFFFULL; + int32 maxLoopTime = 4; + + if (chunkLval == chunkHval) { + /* compute lower 32 bits, and then copy to higher 32 bits, so only 2 chunks need be processed */ + maxLoopTime = 2; + } + + uint64 sa = 0; + for (int64 i = 0 ; i < maxLoopTime; ++i, sa += k16BitSize) { + /* create an imm opereand which represents the i-th 16-bit chunk of the immediate */ + uint64 chunkVal = (static_cast(immVal) >> sa) & 0x0000FFFFULL; + if (useMovz ? (chunkVal == 0) : (chunkVal == 0x0000FFFFULL)) { + continue; + } + ImmOperand &src16 = CreateImmOperand(chunkVal, k16BitSize, false); + LogicalShiftLeftOperand *lslOpnd = GetLogicalShiftLeftOperand(sa, true); + if (!useMovk) { + /* use movz or movn */ + if (!useMovz) { + src16.BitwiseNegate(); + } + MOperator mOpCode = useMovz ? MOP_xmovzri16 : MOP_xmovnri16; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, destReg, src16, *lslOpnd)); + useMovk = true; + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xmovkri16, destReg, src16, *lslOpnd)); + } + } + + if (maxLoopTime == 2) { + /* copy lower 32 bits to higher 32 bits */ + AArch64ImmOperand &immOpnd = CreateImmOperand(k32BitSize, k8BitSize, false); + Insn &insn = GetCG()->BuildInstruction(MPO_xbfirri6i6, destReg, destReg, immOpnd, immOpnd); + GetCurBB()->AppendInsn(insn); + } +} + +void AArch64CGFunc::SelectCopyRegOpnd(Operand &dest, PrimType dtype, Operand::OperandType opndType, + uint32 dsize, Operand &src, PrimType stype) { + if (opndType != Operand::kOpdMem) { + ASSERT(stype != PTY_a32, ""); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickMovInsn(stype), dest, src)); + return; + } + AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone; + const MIRSymbol *sym = static_cast(&dest)->GetSymbol(); + if ((sym != nullptr) && (sym->GetStorageClass() == kScGlobal) && sym->GetAttr(ATTR_memory_order_release)) { + memOrd = AArch64isa::kMoRelease; + } + + if (memOrd != AArch64isa::kMoNone) { + AArch64CGFunc::SelectStoreRelease(dest, dtype, src, stype, memOrd, true); + return; + } + + bool is64Bits = (dest.GetSize() == k64BitSize) ? true : false; + MOperator strMop = PickStInsn(dsize, stype); + if (!dest.IsMemoryAccessOperand()) { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(strMop, src, dest)); + return; + } + + AArch64MemOperand *memOpnd = static_cast(&dest); + ASSERT(memOpnd != nullptr, "memOpnd should not be nullptr"); + if (memOpnd->GetAddrMode() == AArch64MemOperand::kAddrModeLo12Li) { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(strMop, src, dest)); + return; + } + if (memOpnd->GetOffsetOperand() == nullptr) { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(strMop, src, dest)); + return; + } + ImmOperand *immOpnd = static_cast(memOpnd->GetOffsetOperand()); + ASSERT(immOpnd != nullptr, "immOpnd should not be nullptr"); + int64 immVal = immOpnd->GetValue(); + bool isIntactIndexed = memOpnd->IsIntactIndexed(); + bool isPostIndexed = memOpnd->IsPostIndexed(); + bool isPreIndexed = memOpnd->IsPreIndexed(); + bool isInRange = IsImmediateValueInRange(strMop, immVal, is64Bits, isIntactIndexed, isPostIndexed, isPreIndexed); + bool isMopStr = IsStoreMop(strMop); + if (isInRange || !isMopStr) { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(strMop, src, dest)); + return; + } + ASSERT(memOpnd->GetBaseRegister() != nullptr, "nullptr check"); + if (isIntactIndexed) { + RegOperand ® = CreateRegisterOperandOfType(PTY_i64); + AArch64ImmOperand *aarch64ImmOpnd = static_cast(immOpnd); + if (aarch64ImmOpnd->IsSingleInstructionMovable()) { + MOperator mOp = MOP_xmovri64; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, reg, *immOpnd)); + } else { + SplitMovImmOpndInstruction(immVal, reg); + } + MemOperand &newDest = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, GetPrimTypeBitSize(dtype), + memOpnd->GetBaseRegister(), ®, nullptr, nullptr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(strMop, src, newDest)); + } else if (isPostIndexed || isPreIndexed) { + RegOperand ® = CreateRegisterOperandOfType(PTY_i64); + MOperator mopMov = MOP_xmovri64; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopMov, reg, *immOpnd)); + MOperator mopAdd = MOP_xaddrrr; + MemOperand &newDest = + GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, GetPrimTypeBitSize(dtype), memOpnd->GetBaseRegister(), + nullptr, &GetOrCreateOfstOpnd(0, k32BitSize), nullptr); + Insn &insn1 = GetCG()->BuildInstruction(strMop, src, newDest); + Insn &insn2 = GetCG()->BuildInstruction(mopAdd, *newDest.GetBaseRegister(), + *newDest.GetBaseRegister(), reg); + if (isPostIndexed) { + GetCurBB()->AppendInsn(insn1); + GetCurBB()->AppendInsn(insn2); + } else { + /* isPreIndexed */ + GetCurBB()->AppendInsn(insn2); + GetCurBB()->AppendInsn(insn1); + } + } +} + +void AArch64CGFunc::SelectCopy(Operand &dest, PrimType dtype, Operand &src, PrimType stype) { + ASSERT(dest.IsRegister() || dest.IsMemoryAccessOperand(), ""); + uint32 dsize = GetPrimTypeBitSize(dtype); + if (dest.IsRegister()) { + dsize = dest.GetSize(); + } + Operand::OperandType opnd0Type = dest.GetKind(); + Operand::OperandType opnd1Type = src.GetKind(); + ASSERT(((dsize >= src.GetSize()) || (opnd0Type == Operand::kOpdMem)), "NYI"); + ASSERT(((opnd0Type == Operand::kOpdRegister) || (src.GetKind() == Operand::kOpdRegister)), + "either src or dest should be register"); + + switch (opnd1Type) { + case Operand::kOpdMem: + SelectCopyMemOpnd(dest, dtype, dsize, src, stype); + break; + case Operand::kOpdOffset: + case Operand::kOpdImmediate: + SelectCopyImm(dest, static_cast(src), stype); + break; + case Operand::kOpdFPZeroImmediate: + GetCurBB()->AppendInsn(GetCG()->BuildInstruction((dsize == k32BitSize) ? MOP_xvmovsr : MOP_xvmovdr, + dest, AArch64RegOperand::GetZeroRegister(dsize))); + break; + case Operand::kOpdRegister: + SelectCopyRegOpnd(dest, dtype, opnd0Type, dsize, src, stype); + break; + default: + CHECK_FATAL(false, "NYI"); + } +} + +/* This function copies src to a register, the src can be an imm, mem or a label */ +RegOperand &AArch64CGFunc::SelectCopy(Operand &src, PrimType stype, PrimType dtype) { + RegOperand &dest = CreateRegisterOperandOfType(dtype); + SelectCopy(dest, dtype, src, stype); + return dest; +} + +/* + * We need to adjust the offset of a stack allocated local variable + * if we store FP/SP before any other local variables to save an instruction. + * See AArch64CGFunc::OffsetAdjustmentForFPLR() in aarch64_cgfunc.cpp + * + * That is when we !UsedStpSubPairForCallFrameAllocation(). + * + * Because we need to use the STP/SUB instruction pair to store FP/SP 'after' + * local variables when the call frame size is greater that the max offset + * value allowed for the STP instruction (we cannot use STP w/ prefix, LDP w/ + * postfix), if UsedStpSubPairForCallFrameAllocation(), we don't need to + * adjust the offsets. + */ +bool AArch64CGFunc::IsImmediateOffsetOutOfRange(AArch64MemOperand &memOpnd, uint32 bitLen) { + ASSERT(bitLen >= k8BitSize, "bitlen error"); + ASSERT(bitLen <= k64BitSize, "bitlen error"); + ASSERT((bitLen & (bitLen - 1)) == 0, "bitlen error"); + AArch64MemOperand::AArch64AddressingMode mode = memOpnd.GetAddrMode(); + if ((mode == AArch64MemOperand::kAddrModeBOi) && memOpnd.IsIntactIndexed()) { + int32 offsetValue = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if (memOpnd.GetOffsetImmediate()->GetVary() == kUnAdjustVary) { + offsetValue += static_cast(GetMemlayout())->RealStackFrameSize() + 0xff; + } + offsetValue += 2 * kIntregBytelen; /* Refer to the above comment */ + return AArch64MemOperand::IsPIMMOffsetOutOfRange(offsetValue, bitLen); + } else { + return false; + } +} + +AArch64MemOperand &AArch64CGFunc::CreateReplacementMemOperand(uint32 bitLen, + RegOperand &baseReg, int32 offset) { + return static_cast(CreateMemOpnd(baseReg, offset, bitLen)); +} + +bool AArch64CGFunc::CheckIfSplitOffsetWithAdd(const AArch64MemOperand &memOpnd, uint32 bitLen) { + if (memOpnd.GetAddrMode() != AArch64MemOperand::kAddrModeBOi || !memOpnd.IsIntactIndexed()) { + return false; + } + AArch64OfstOperand *ofstOpnd = memOpnd.GetOffsetImmediate(); + int32 opndVal = ofstOpnd->GetOffsetValue(); + int32 maxPimm = memOpnd.GetMaxPIMM(bitLen); + int32 q0 = opndVal / maxPimm; + int32 addend = q0 * maxPimm; + int32 r0 = opndVal - addend; + int32 alignment = memOpnd.GetImmediateOffsetAlignment(bitLen); + int32 r1 = static_cast(r0) & ((1u << static_cast(alignment)) - 1); + addend = addend + r1; + return (addend > 0); +} + +AArch64MemOperand &AArch64CGFunc::SplitOffsetWithAddInstruction(const AArch64MemOperand &memOpnd, uint32 bitLen, + AArch64reg baseRegNum, bool isDest, Insn *insn) { + ASSERT((memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi), "expect kAddrModeBOi memOpnd"); + ASSERT(memOpnd.IsIntactIndexed(), "expect intactIndexed memOpnd"); + AArch64OfstOperand *ofstOpnd = memOpnd.GetOffsetImmediate(); + int32 opndVal = ofstOpnd->GetOffsetValue(); + + /* + * opndVal == Q0 * 32760(16380) + R0 + * R0 == Q1 * 8(4) + R1 + * ADDEND == Q0 * 32760(16380) + R1 + * NEW_OFFSET = Q1 * 8(4) + * we want to generate two instructions: + * ADD TEMP_REG, X29, ADDEND + * LDR/STR TEMP_REG, [ TEMP_REG, #NEW_OFFSET ] + */ + int32 maxPimm = memOpnd.GetMaxPIMM(bitLen); + int32 q0 = opndVal / maxPimm; + int32 addend = q0 * maxPimm; + int32 r0 = opndVal - addend; + int32 alignment = memOpnd.GetImmediateOffsetAlignment(bitLen); + int32 q1 = static_cast(r0) >> static_cast(alignment); + int32 r1 = static_cast(r0) & ((1u << static_cast(alignment)) - 1); + addend = addend + r1; + RegOperand *origBaseReg = memOpnd.GetBaseRegister(); + ASSERT(origBaseReg != nullptr, "nullptr check"); + if (addend > 0) { + int32 t = addend; + constexpr uint32 suffixClear = 0xfffff000; + addend = (static_cast(addend) & suffixClear); + q1 = (static_cast(q1) << static_cast(alignment)) + (t - addend); + if (AArch64MemOperand::IsPIMMOffsetOutOfRange(q1, bitLen)) { + addend = (static_cast(opndVal) & suffixClear); + q1 = opndVal - addend; + } + ImmOperand &immAddend = CreateImmOperand(addend, k64BitSize, true); + RegOperand &resOpnd = (baseRegNum == AArch64reg::kRinvalid) + ? CreateRegisterOperandOfType(PTY_i64) + : GetOrCreatePhysicalRegisterOperand(baseRegNum, kSizeOfPtr * kBitsPerByte, kRegTyInt); + if (insn == nullptr) { + SelectAdd(resOpnd, *origBaseReg, immAddend, PTY_i64); + } else { + SelectAddAfterInsn(resOpnd, *origBaseReg, immAddend, PTY_i64, isDest, *insn); + } + AArch64MemOperand &newMemOpnd = CreateReplacementMemOperand(bitLen, resOpnd, q1); + newMemOpnd.SetStackMem(memOpnd.IsStackMem()); + return newMemOpnd; + } else { + AArch64MemOperand &newMemOpnd = CreateReplacementMemOperand( + bitLen, *origBaseReg, (static_cast(q1) << static_cast(alignment))); + newMemOpnd.SetStackMem(memOpnd.IsStackMem()); + return newMemOpnd; + } +} + +void AArch64CGFunc::SelectDassign(DassignNode &stmt, Operand &opnd0) { + SelectDassign(stmt.GetStIdx(), stmt.GetFieldID(), stmt.GetRHS()->GetPrimType(), opnd0); +} + +/* + * Used for SelectDassign when do optimization for volatile store, because the stlr instruction only allow + * store to the memory addrress with the register base offset 0. + * STLR , [{,#0}], 32-bit variant (size = 10) + * STLR , [{,#0}], 64-bit variant (size = 11) + * So the function do the prehandle of the memory operand to satisify the Store-Release.. + */ +RegOperand *AArch64CGFunc::ExtractNewMemBase(MemOperand &memOpnd) { + const MIRSymbol *sym = memOpnd.GetSymbol(); + AArch64MemOperand::AArch64AddressingMode mode = static_cast(&memOpnd)->GetAddrMode(); + if (mode == AArch64MemOperand::kAddrModeLiteral) { + return nullptr; + } + RegOperand *baseOpnd = memOpnd.GetBaseRegister(); + ASSERT(baseOpnd != nullptr, "nullptr check"); + RegOperand &resultOpnd = CreateRegisterOperandOfType(baseOpnd->GetRegisterType(), baseOpnd->GetSize() / kBitsPerByte); + bool is64Bits = (baseOpnd->GetSize() == k64BitSize); + if (mode == AArch64MemOperand::kAddrModeLo12Li) { + StImmOperand &stImm = CreateStImmOperand(*sym, 0, 0); + Insn &addInsn = GetCG()->BuildInstruction(MOP_xadrpl12, resultOpnd, *baseOpnd, stImm); + addInsn.SetComment("new add insn"); + GetCurBB()->AppendInsn(addInsn); + } else if (mode == AArch64MemOperand::kAddrModeBOi) { + AArch64OfstOperand *offsetOpnd = static_cast(&memOpnd)->GetOffsetImmediate(); + if (offsetOpnd->GetOffsetValue() != 0) { + MOperator mOp = is64Bits ? MOP_xaddrri12 : MOP_waddrri12; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resultOpnd, *baseOpnd, *offsetOpnd)); + } else { + return baseOpnd; + } + } else { + CHECK_FATAL(mode == AArch64MemOperand::kAddrModeBOrX, "unexpect addressing mode."); + RegOperand *regOpnd = static_cast(&memOpnd)->GetOffsetRegister(); + MOperator mOp = is64Bits ? MOP_xaddrrr : MOP_waddrrr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resultOpnd, *baseOpnd, *regOpnd)); + } + return &resultOpnd; +} + +/* + * NOTE: I divided SelectDassign so that we can create "virtual" assignments + * when selecting other complex Maple IR instructions. For example, the atomic + * exchange and other intrinsics will need to assign its results to local + * variables. Such Maple IR instructions are pltform-specific (e.g. + * atomic_exchange can be implemented as one single machine intruction on x86_64 + * and ARMv8.1, but ARMv8.0 needs an LL/SC loop), therefore they cannot (in + * principle) be lowered at BELowerer or CGLowerer. + */ +void AArch64CGFunc::SelectDassign(StIdx stIdx, FieldID fieldId, PrimType rhsPType, Operand &opnd0) { + MIRSymbol *symbol = GetFunction().GetLocalOrGlobalSymbol(stIdx); + int32 offset = 0; + bool parmCopy = false; + if (fieldId != 0) { + MIRStructType *structType = static_cast(symbol->GetType()); + ASSERT(structType != nullptr, "SelectDassign: non-zero fieldID for non-structure"); + offset = GetBecommon().GetFieldOffset(*structType, fieldId).first; + parmCopy = IsParamStructCopy(*symbol); + } + uint32 regSize = GetPrimTypeBitSize(rhsPType); + MIRType *type = symbol->GetType(); + Operand &stOpnd = LoadIntoRegister(opnd0, IsPrimitiveInteger(rhsPType), regSize, + IsSignedInteger(type->GetPrimType())); + MOperator mOp = MOP_undef; + if ((type->GetKind() == kTypeStruct) || (type->GetKind() == kTypeUnion)) { + MIRStructType *structType = static_cast(type); + type = structType->GetFieldType(fieldId); + } else if (type->GetKind() == kTypeClass) { + MIRClassType *classType = static_cast(type); + type = classType->GetFieldType(fieldId); + } + + uint32 dataSize = GetPrimTypeBitSize(type->GetPrimType()); + if (type->GetPrimType() == PTY_agg) { + dataSize = GetPrimTypeBitSize(PTY_a64); + } + MemOperand *memOpnd = nullptr; + if (parmCopy) { + memOpnd = &LoadStructCopyBase(*symbol, offset, dataSize); + } else { + memOpnd = &GetOrCreateMemOpnd(*symbol, offset, dataSize); + } + AArch64MemOperand &archMemOperand = *static_cast(memOpnd); + if ((memOpnd->GetMemVaryType() == kNotVary) && IsImmediateOffsetOutOfRange(archMemOperand, dataSize)) { + memOpnd = &SplitOffsetWithAddInstruction(archMemOperand, dataSize); + } + + ASSERT(((type->GetKind() == kTypeScalar) || (type->GetKind() == kTypePointer) || + (type->GetKind() == kTypeStruct) || (type->GetKind() == kTypeArray)), "NYI dassign type"); + PrimType ptyp = type->GetPrimType(); + if (ptyp == PTY_agg) { + ptyp = PTY_a64; + } + + AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone; + if (isVolStore) { + RegOperand *baseOpnd = ExtractNewMemBase(*memOpnd); + if (baseOpnd != nullptr) { + memOpnd = &CreateMemOpnd(*baseOpnd, 0, dataSize); + memOrd = AArch64isa::kMoRelease; + isVolStore = false; + } + } + if (memOrd == AArch64isa::kMoNone) { + mOp = PickStInsn(GetPrimTypeBitSize(ptyp), ptyp); + Insn &insn = GetCG()->BuildInstruction(mOp, stOpnd, *memOpnd); + + if (GetCG()->GenerateVerboseCG()) { + const MIRSymbol *symSecond = static_cast(memOpnd)->GetSymbol(); + if (symSecond != nullptr) { + std::string key; + MIRStorageClass sc = symSecond->GetStorageClass(); + if (sc == kScFormal) { + key = "param: "; + } else if (sc == kScAuto) { + key = "local var: "; + } else { + key = "global: "; + } + insn.SetComment(key.append(symSecond->GetName())); + } + } + + GetCurBB()->AppendInsn(insn); + } else { + AArch64CGFunc::SelectStoreRelease(*memOpnd, ptyp, stOpnd, ptyp, memOrd, true); + } +} + +void AArch64CGFunc::SelectAssertNull(UnaryStmtNode &stmt) { + Operand *opnd0 = HandleExpr(stmt, *stmt.Opnd(0)); + RegOperand &baseReg = LoadIntoRegister(*opnd0, PTY_a64); + auto &zwr = AArch64RegOperand::Get32bitZeroRegister(); + auto &mem = CreateMemOpnd(baseReg, 0, k32BitSize); + Insn &loadRef = GetCG()->BuildInstruction(MOP_wldr, zwr, mem); + loadRef.SetDoNotRemove(true); + if (GetCG()->GenerateVerboseCG()) { + loadRef.SetComment("null pointer check"); + } + GetCurBB()->AppendInsn(loadRef); +} + +void AArch64CGFunc::SelectRegassign(RegassignNode &stmt, Operand &opnd0) { + RegOperand *regOpnd = nullptr; + PregIdx pregIdx = stmt.GetRegIdx(); + if (IsSpecialPseudoRegister(pregIdx)) { + /* if it is one of special registers */ + ASSERT(-pregIdx != kSregRetval0, "the dest of RegAssign node must not be kSregRetval0"); + regOpnd = &GetOrCreateSpecialRegisterOperand(-pregIdx); + } else { + regOpnd = &GetOrCreateVirtualRegisterOperand(GetVirtualRegNOFromPseudoRegIdx(pregIdx)); + } + /* look at rhs */ + PrimType rhsType = stmt.Opnd(0)->GetPrimType(); + PrimType dtype = rhsType; + if (GetPrimTypeBitSize(dtype) < k32BitSize) { + ASSERT(IsPrimitiveInteger(dtype), ""); + dtype = IsSignedInteger(dtype) ? PTY_i32 : PTY_u32; + } + ASSERT(regOpnd != nullptr, "null ptr check!"); + SelectCopy(*regOpnd, dtype, opnd0, rhsType); + + if ((Globals::GetInstance()->GetOptimLevel() == 0) && (pregIdx >= 0)) { + MemOperand *dest = GetPseudoRegisterSpillMemoryOperand(pregIdx); + PrimType stype = GetTypeFromPseudoRegIdx(pregIdx); + MIRPreg *preg = GetFunction().GetPregTab()->PregFromPregIdx(pregIdx); + uint32 srcBitLength = GetPrimTypeSize(preg->GetPrimType()) * kBitsPerByte; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickStInsn(srcBitLength, stype), *regOpnd, *dest)); + } +} + +void AArch64CGFunc::SelectAggDassign(DassignNode &stmt) { + MIRSymbol *lhsSymbol = GetFunction().GetLocalOrGlobalSymbol(stmt.GetStIdx()); + int32 lhsOffset = 0; + MIRType *lhsType = lhsSymbol->GetType(); + if (stmt.GetFieldID() != 0) { + MIRStructType *structType = static_cast(lhsSymbol->GetType()); + ASSERT(structType != nullptr, "SelectAggDassign: non-zero fieldID for non-structure"); + lhsType = structType->GetFieldType(stmt.GetFieldID()); + lhsOffset = GetBecommon().GetFieldOffset(*structType, stmt.GetFieldID()).first; + } + uint32 lhsAlign = GetBecommon().GetTypeAlign(lhsType->GetTypeIndex()); + uint64 lhsSize = GetBecommon().GetTypeSize(lhsType->GetTypeIndex()); + + uint32 rhsAlign; + uint32 alignUsed; + int32 rhsOffset = 0; + if (stmt.GetRHS()->GetOpCode() == OP_dread) { + AddrofNode *rhsDread = static_cast(stmt.GetRHS()); + MIRSymbol *rhsSymbol = GetFunction().GetLocalOrGlobalSymbol(rhsDread->GetStIdx()); + MIRType *rhsType = rhsSymbol->GetType(); + if (rhsDread->GetFieldID() != 0) { + MIRStructType *structType = static_cast(rhsSymbol->GetType()); + ASSERT(structType != nullptr, "SelectAggDassign: non-zero fieldID for non-structure"); + rhsType = structType->GetFieldType(rhsDread->GetFieldID()); + rhsOffset = GetBecommon().GetFieldOffset(*structType, rhsDread->GetFieldID()).first; + } + rhsAlign = GetBecommon().GetTypeAlign(rhsType->GetTypeIndex()); + alignUsed = std::min(lhsAlign, rhsAlign); + ASSERT(alignUsed != 0, "expect non-zero"); + for (uint32 i = 0; i < (lhsSize / alignUsed); i++) { + /* generate the load */ + Operand &rhsMemOpnd = GetOrCreateMemOpnd(*rhsSymbol, rhsOffset + i * alignUsed, alignUsed * k8BitSize); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, alignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickLdInsn(alignUsed * k8BitSize, PTY_u32), + result, rhsMemOpnd)); + /* generate the store */ + Operand &lhsMemOpnd = GetOrCreateMemOpnd(*lhsSymbol, lhsOffset + i * alignUsed, alignUsed * k8BitSize); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickStInsn(alignUsed * k8BitSize, PTY_u32), + result, lhsMemOpnd)); + } + /* take care of extra content at the end less than the unit of alignUsed */ + uint64 lhsSizeCovered = (lhsSize / alignUsed) * alignUsed; + uint32 newAlignUsed = alignUsed; + while (lhsSizeCovered < lhsSize) { + newAlignUsed = newAlignUsed >> 1; + CHECK_FATAL(newAlignUsed != 0, "expect non-zero"); + if ((lhsSizeCovered + newAlignUsed) > lhsSize) { + continue; + } + /* generate the load */ + Operand &rhsMemOpnd = GetOrCreateMemOpnd(*rhsSymbol, rhsOffset + lhsSizeCovered, newAlignUsed * k8BitSize); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, newAlignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickLdInsn(newAlignUsed * k8BitSize, PTY_u32), + result, rhsMemOpnd)); + /* generate the store */ + Operand &lhsMemOpnd = GetOrCreateMemOpnd(*lhsSymbol, lhsOffset + lhsSizeCovered, newAlignUsed * k8BitSize); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickStInsn(newAlignUsed * k8BitSize, PTY_u32), + result, lhsMemOpnd)); + lhsSizeCovered += newAlignUsed; + } + } else if (stmt.GetRHS()->GetOpCode() == OP_iread) { + IreadNode *rhsIread = static_cast(stmt.GetRHS()); + RegOperand *addrOpnd = static_cast(HandleExpr(*rhsIread, *rhsIread->Opnd(0))); + addrOpnd = &LoadIntoRegister(*addrOpnd, rhsIread->Opnd(0)->GetPrimType()); + MIRPtrType *rhsPointerType = static_cast( + GlobalTables::GetTypeTable().GetTypeFromTyIdx(rhsIread->GetTyIdx())); + MIRType *rhsType = static_cast( + GlobalTables::GetTypeTable().GetTypeFromTyIdx(rhsPointerType->GetPointedTyIdx())); + bool isRefField = false; + if (rhsIread->GetFieldID() != 0) { + MIRStructType *rhsStructType = static_cast(rhsType); + ASSERT(rhsStructType != nullptr, "SelectAggDassign: non-zero fieldID for non-structure"); + rhsType = rhsStructType->GetFieldType(rhsIread->GetFieldID()); + rhsOffset = GetBecommon().GetFieldOffset(*rhsStructType, rhsIread->GetFieldID()).first; + isRefField = GetBecommon().IsRefField(*rhsStructType, rhsIread->GetFieldID()); + } + rhsAlign = GetBecommon().GetTypeAlign(rhsType->GetTypeIndex()); + alignUsed = std::min(lhsAlign, rhsAlign); + ASSERT(alignUsed != 0, "expect non-zero"); + for (uint32 i = 0; i < (lhsSize / alignUsed); i++) { + /* generate the load */ + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(rhsOffset + i * alignUsed, k32BitSize); + Operand &rhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, alignUsed * k8BitSize, + addrOpnd, nullptr, &ofstOpnd, nullptr); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, alignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + Insn &insn = + GetCG()->BuildInstruction(PickLdInsn(alignUsed * k8BitSize, PTY_u32), result, rhsMemOpnd); + insn.MarkAsAccessRefField(isRefField); + GetCurBB()->AppendInsn(insn); + /* generate the store */ + Operand &lhsMemOpnd = GetOrCreateMemOpnd(*lhsSymbol, lhsOffset + i * alignUsed, alignUsed * k8BitSize); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(alignUsed * k8BitSize, PTY_u32), result, lhsMemOpnd)); + } + /* take care of extra content at the end less than the unit of alignUsed */ + uint64 lhsSizeCovered = (lhsSize / alignUsed) * alignUsed; + uint32 newAlignUsed = alignUsed; + while (lhsSizeCovered < lhsSize) { + newAlignUsed = newAlignUsed >> 1; + CHECK_FATAL(newAlignUsed != 0, "expect non-zero"); + if ((lhsSizeCovered + newAlignUsed) > lhsSize) { + continue; + } + /* generate the load */ + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(rhsOffset + lhsSizeCovered, k32BitSize); + Operand &rhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, newAlignUsed * k8BitSize, + addrOpnd, nullptr, &ofstOpnd, nullptr); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, newAlignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + Insn &insn = + GetCG()->BuildInstruction(PickLdInsn(newAlignUsed * k8BitSize, PTY_u32), result, rhsMemOpnd); + insn.MarkAsAccessRefField(isRefField); + GetCurBB()->AppendInsn(insn); + /* generate the store */ + Operand &lhsMemOpnd = GetOrCreateMemOpnd(*lhsSymbol, lhsOffset + lhsSizeCovered, newAlignUsed * k8BitSize); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(newAlignUsed * k8BitSize, PTY_u32), result, lhsMemOpnd)); + lhsSizeCovered += newAlignUsed; + } + } else { + ASSERT(stmt.GetRHS()->op == OP_regread, "SelectAggDassign: NYI"); + bool isRet = false; + if (lhsType->GetKind() == kTypeStruct || lhsType->GetKind() == kTypeUnion) { + RegreadNode *rhsregread = static_cast(stmt.GetRHS()); + PregIdx pregIdx = rhsregread->GetRegIdx(); + if (IsSpecialPseudoRegister(pregIdx)) { + if ((-pregIdx) == kSregRetval0) { + CHECK_FATAL(lhsSize <= k16ByteSize, "SelectAggDassign: Incorrect agg size"); + RegOperand &parm1 = GetOrCreateSpecialRegisterOperand(pregIdx); + Operand &memopnd1 = GetOrCreateMemOpnd(*lhsSymbol, 0, k64BitSize); + MOperator mop1 = PickStInsn(k64BitSize, PTY_u64); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop1, parm1, memopnd1)); + if (lhsSize > k8ByteSize) { + RegOperand &parm2 = GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, kRegTyInt); + Operand &memopnd2 = GetOrCreateMemOpnd(*lhsSymbol, k8ByteSize, k64BitSize); + MOperator mop2 = PickStInsn(k64BitSize, PTY_u64); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop2, parm2, memopnd2)); + } + isRet = true; + } + } + } + CHECK_FATAL(isRet, "SelectAggDassign: NYI"); + } +} + +static MIRType *GetPointedToType(MIRPtrType &pointerType) { + MIRType *aType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType.GetPointedTyIdx()); + if (aType->GetKind() == kTypeArray) { + MIRArrayType *arrayType = static_cast(aType); + return GlobalTables::GetTypeTable().GetTypeFromTyIdx(arrayType->GetElemTyIdx()); + } + if (aType->GetKind() == kTypeFArray || aType->GetKind() == kTypeJArray) { + MIRFarrayType *farrayType = static_cast(aType); + return GlobalTables::GetTypeTable().GetTypeFromTyIdx(farrayType->GetElemTyIdx()); + } + return aType; +} + +void AArch64CGFunc::SelectIassign(IassignNode &stmt) { + int32 offset = 0; + MIRPtrType *pointerType = static_cast(GlobalTables::GetTypeTable().GetTypeFromTyIdx(stmt.GetTyIdx())); + ASSERT(pointerType != nullptr, "expect a pointer type at iassign node"); + MIRType *pointedType = nullptr; + bool isRefField = false; + AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone; + + if (stmt.GetFieldID() != 0) { + MIRType *pointedTy = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType->GetPointedTyIdx()); + MIRStructType *structType = nullptr; + if (pointedTy->GetKind() != kTypeJArray) { + structType = static_cast(pointedTy); + } else { + /* it's a Jarray type. using it's parent's field info: java.lang.Object */ + structType = static_cast(pointedTy)->GetParentType(); + } + ASSERT(structType != nullptr, "SelectIassign: non-zero fieldID for non-structure"); + pointedType = structType->GetFieldType(stmt.GetFieldID()); + offset = GetBecommon().GetFieldOffset(*structType, stmt.GetFieldID()).first; + isRefField = GetBecommon().IsRefField(*structType, stmt.GetFieldID()); + } else { + pointedType = GetPointedToType(*pointerType); + if (GetFunction().IsJava() && (pointedType->GetKind() == kTypePointer)) { + MIRType *nextPointedType = + GlobalTables::GetTypeTable().GetTypeFromTyIdx(static_cast(pointedType)->GetPointedTyIdx()); + if (nextPointedType->GetKind() != kTypeScalar) { + isRefField = true; /* write into an object array or a high-dimensional array */ + } + } + if (pointedType->GetPrimType() == PTY_agg) { + maple::LogInfo::MapleLogger(kLlErr) << "Error: cannot find field in " << + GlobalTables::GetStrTable().GetStringFromStrIdx(pointedType->GetNameStrIdx()) << '\n'; + exit(-1); + } + } + + PrimType styp = stmt.GetRHS()->GetPrimType(); + Operand *valOpnd = HandleExpr(stmt, *stmt.GetRHS()); + Operand &srcOpnd = LoadIntoRegister(*valOpnd, IsPrimitiveInteger(styp), GetPrimTypeBitSize(styp)); + + PrimType destType = pointedType->GetPrimType(); + if (destType == PTY_agg) { + destType = PTY_a64; + } + ASSERT(stmt.Opnd(0) != nullptr, "null ptr check"); + MemOperand &memOpnd = CreateMemOpnd(destType, stmt, *stmt.Opnd(0), offset); + if (isVolStore && static_cast(memOpnd).GetAddrMode() == AArch64MemOperand::kAddrModeBOi) { + memOrd = AArch64isa::kMoRelease; + isVolStore = false; + } + + if (memOrd == AArch64isa::kMoNone) { + SelectCopy(memOpnd, destType, srcOpnd, destType); + } else { + AArch64CGFunc::SelectStoreRelease(memOpnd, destType, srcOpnd, destType, memOrd, false); + } + GetCurBB()->GetLastInsn()->MarkAsAccessRefField(isRefField); +} + +void AArch64CGFunc::SelectAggIassign(IassignNode &stmt, Operand &AddrOpnd) { + ASSERT(stmt.Opnd(0) != nullptr, "null ptr check"); + Operand &lhsAddrOpnd = LoadIntoRegister(AddrOpnd, stmt.Opnd(0)->GetPrimType()); + int32 lhsOffset = 0; + MIRType *stmtType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(stmt.GetTyIdx()); + MIRSymbol *addrSym = nullptr; + MIRPtrType *lhsPointerType = nullptr; + if (stmtType->GetPrimType() == PTY_agg) { + /* Move into regs */ + AddrofNode &addrofnode = static_cast(stmt.GetAddrExprBase()); + addrSym = mirModule.CurFunction()->GetLocalOrGlobalSymbol(addrofnode.GetStIdx()); + MIRType *addrty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(addrSym->GetTyIdx()); + lhsPointerType = static_cast(GlobalTables::GetTypeTable().GetTypeFromTyIdx(addrty->GetTypeIndex())); + } else { + lhsPointerType = static_cast(stmtType); + } + MIRType *lhsType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(lhsPointerType->GetPointedTyIdx()); + if (stmt.GetFieldID() != 0) { + MIRStructType *structType = static_cast(lhsType); + ASSERT(structType != nullptr, "SelectAggIassign: non-zero fieldID for non-structure"); + lhsType = structType->GetFieldType(stmt.GetFieldID()); + lhsOffset = GetBecommon().GetFieldOffset(*structType, stmt.GetFieldID()).first; + } else if (lhsType->GetKind() == kTypeArray) { +#if DEBUG + MIRArrayType *arrayLhsType = static_cast(lhsType); + /* access an array element */ + MIRType *lhsType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(arrayLhsType->GetElemTyIdx()); + MIRTypeKind typeKind = lhsType->GetKind(); + ASSERT(((typeKind == kTypeScalar) || (typeKind == kTypeStruct) || (typeKind == kTypeClass) || + (typeKind == kTypePointer)), + "unexpected array element type in iassign"); +#endif + } else if (lhsType->GetKind() == kTypeFArray) { +#if DEBUG + MIRFarrayType *farrayLhsType = static_cast(lhsType); + /* access an array element */ + MIRType *lhsElemType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(farrayLhsType->GetElemTyIdx()); + MIRTypeKind typeKind = lhsElemType->GetKind(); + ASSERT(((typeKind == kTypeScalar) || (typeKind == kTypeStruct) || (typeKind == kTypeClass) || + (typeKind == kTypePointer)), + "unexpected array element type in iassign"); +#endif + } + uint32 lhsAlign = GetBecommon().GetTypeAlign(lhsType->GetTypeIndex()); + uint64 lhsSize = GetBecommon().GetTypeSize(lhsType->GetTypeIndex()); + + uint32 rhsAlign; + uint32 alignUsed; + int32 rhsOffset = 0; + if (stmt.GetRHS()->GetOpCode() == OP_dread) { + AddrofNode *rhsDread = static_cast(stmt.GetRHS()); + MIRSymbol *rhsSymbol = GetFunction().GetLocalOrGlobalSymbol(rhsDread->GetStIdx()); + MIRType *rhsType = rhsSymbol->GetType(); + if (rhsDread->GetFieldID() != 0) { + MIRStructType *structType = static_cast(rhsSymbol->GetType()); + ASSERT(structType != nullptr, "SelectAggIassign: non-zero fieldID for non-structure"); + rhsType = structType->GetFieldType(rhsDread->GetFieldID()); + rhsOffset = GetBecommon().GetFieldOffset(*structType, rhsDread->GetFieldID()).first; + } + if (stmtType->GetPrimType() == PTY_agg) { + /* generate move to regs. */ + CHECK_FATAL(lhsSize <= k16ByteSize, "SelectAggIassign: illegal struct size"); + /* aggregates are 8 byte aligned. */ + Operand *rhsmemopnd = nullptr; + RegOperand *result[kTwoRegister]; /* maximum 16 bytes, 2 registers */ + bool parmCopy = IsParamStructCopy(*rhsSymbol); + uint32 loadSize = (lhsSize <= k4ByteSize) ? k4ByteSize : k8ByteSize; + uint32 numRegs = (lhsSize <= k8ByteSize) ? kOneRegister : kTwoRegister; + for (uint32 i = 0; i < numRegs; i++) { + if (parmCopy) { + rhsmemopnd = &LoadStructCopyBase(*rhsSymbol, rhsOffset + i * k8ByteSize, loadSize * kBitsPerByte); + } else { + rhsmemopnd = &GetOrCreateMemOpnd(*rhsSymbol, rhsOffset + i * k8ByteSize, loadSize * kBitsPerByte); + } + result[i] = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, loadSize)); + MOperator mop1 = PickLdInsn(loadSize * kBitsPerByte, PTY_u32); + Insn &ld = GetCG()->BuildInstruction(mop1, *(result[i]), *rhsmemopnd); + GetCurBB()->AppendInsn(ld); + } + for (uint32 i = 0; i < numRegs; i++) { + AArch64reg preg = (i == 0 ? R0 : R1); + RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, kRegTyInt); + MOperator mop2 = (loadSize == k4ByteSize) ? MOP_wmovrr : MOP_xmovrr; + Insn &mov = GetCG()->BuildInstruction(mop2, dest, *(result[i])); + GetCurBB()->AppendInsn(mov); + } + /* Create artificial dependency to extend the live range */ + for (uint32 i = 0; i < numRegs; i++) { + AArch64reg preg = (i == 0 ? R0 : R1); + RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, kRegTyInt); + Insn &pseudo = cg->BuildInstruction(MOP_pseudo_ret_int, dest); + GetCurBB()->AppendInsn(pseudo); + } + return; + } + rhsAlign = GetBecommon().GetTypeAlign(rhsType->GetTypeIndex()); + alignUsed = std::min(lhsAlign, rhsAlign); + ASSERT(alignUsed != 0, "expect non-zero"); + bool parmCopy = IsParamStructCopy(*rhsSymbol); + for (uint32 i = 0; i < (lhsSize / alignUsed); ++i) { + /* generate the load */ + Operand *rhsMemOpnd = nullptr; + if (parmCopy) { + rhsMemOpnd = &LoadStructCopyBase(*rhsSymbol, rhsOffset + i * alignUsed, alignUsed * k8BitSize); + } else { + rhsMemOpnd = &GetOrCreateMemOpnd(*rhsSymbol, rhsOffset + i * alignUsed, alignUsed * k8BitSize); + } + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, alignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickLdInsn(alignUsed * k8BitSize, PTY_u32), result, *rhsMemOpnd)); + /* generate the store */ + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(lhsOffset + i * alignUsed, k32BitSize); + Operand &lhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, alignUsed * k8BitSize, + static_cast(&lhsAddrOpnd), nullptr, &ofstOpnd, nullptr); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(alignUsed * k8BitSize, PTY_u32), result, lhsMemOpnd)); + } + /* take care of extra content at the end less than the unit of alignUsed */ + uint64 lhsSizeCovered = (lhsSize / alignUsed) * alignUsed; + uint32 newAlignUsed = alignUsed; + while (lhsSizeCovered < lhsSize) { + newAlignUsed = newAlignUsed >> 1; + CHECK_FATAL(newAlignUsed != 0, "expect non-zero"); + if ((lhsSizeCovered + newAlignUsed) > lhsSize) { + continue; + } + /* generate the load */ + Operand &rhsMemOpnd = GetOrCreateMemOpnd(*rhsSymbol, rhsOffset + lhsSizeCovered, newAlignUsed * k8BitSize); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, newAlignUsed)); + Operand &result = CreateVirtualRegisterOperand(vRegNO); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickLdInsn(newAlignUsed * k8BitSize, PTY_u32), result, rhsMemOpnd)); + /* generate the store */ + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(lhsOffset + lhsSizeCovered, k32BitSize); + Operand &lhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, newAlignUsed * k8BitSize, + static_cast(&lhsAddrOpnd), nullptr, &ofstOpnd, static_cast(nullptr)); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(newAlignUsed * k8BitSize, PTY_u32), result, lhsMemOpnd)); + lhsSizeCovered += newAlignUsed; + } + } else { /* rhs is iread */ + ASSERT(stmt.GetRHS()->GetOpCode() == OP_iread, "SelectAggDassign: NYI"); + IreadNode *rhsIread = static_cast(stmt.GetRHS()); + RegOperand *rhsAddrOpnd = static_cast(HandleExpr(*rhsIread, *rhsIread->Opnd(0))); + rhsAddrOpnd = &LoadIntoRegister(*rhsAddrOpnd, rhsIread->Opnd(0)->GetPrimType()); + MIRPtrType *rhsPointerType = + static_cast(GlobalTables::GetTypeTable().GetTypeFromTyIdx(rhsIread->GetTyIdx())); + MIRType *rhsType = static_cast( + GlobalTables::GetTypeTable().GetTypeFromTyIdx(rhsPointerType->GetPointedTyIdx())); + bool isRefField = false; + if (rhsIread->GetFieldID() != 0) { + MIRStructType *rhsStructType = static_cast(rhsType); + ASSERT(rhsStructType, "SelectAggDassign: non-zero fieldID for non-structure"); + rhsType = rhsStructType->GetFieldType(rhsIread->GetFieldID()); + rhsOffset = GetBecommon().GetFieldOffset(*rhsStructType, rhsIread->GetFieldID()).first; + isRefField = GetBecommon().IsRefField(*rhsStructType, rhsIread->GetFieldID()); + } + if (stmtType->GetPrimType() == PTY_agg) { + /* generate move to regs. */ + CHECK_FATAL(lhsSize <= k16ByteSize, "SelectAggIassign: illegal struct size"); + RegOperand *result[kTwoRegister]; /* maximum 16 bytes, 2 registers */ + uint32 loadSize = (lhsSize <= k4ByteSize) ? k4ByteSize : k8ByteSize; + uint32 numRegs = (lhsSize <= k8ByteSize) ? kOneRegister : kTwoRegister; + for (uint32 i = 0; i < numRegs; i++) { + AArch64OfstOperand *rhsOffOpnd = &GetOrCreateOfstOpnd(rhsOffset + i * loadSize, loadSize * kBitsPerByte); + Operand &rhsmemopnd = + GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, loadSize, rhsAddrOpnd, nullptr, rhsOffOpnd, nullptr); + result[i] = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, loadSize)); + MOperator mop1 = PickLdInsn(loadSize * kBitsPerByte, PTY_u32); + Insn &ld = GetCG()->BuildInstruction(mop1, *(result[i]), rhsmemopnd); + ld.MarkAsAccessRefField(isRefField); + GetCurBB()->AppendInsn(ld); + } + for (uint32 i = 0; i < numRegs; i++) { + AArch64reg preg = (i == 0 ? R0 : R1); + RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, kRegTyInt); + Insn &mov = GetCG()->BuildInstruction(MOP_xmovrr, dest, *(result[i])); + GetCurBB()->AppendInsn(mov); + } + /* Create artificial dependency to extend the live range */ + for (uint32 i = 0; i < numRegs; i++) { + AArch64reg preg = (i == 0 ? R0 : R1); + RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, kRegTyInt); + Insn &pseudo = cg->BuildInstruction(MOP_pseudo_ret_int, dest); + GetCurBB()->AppendInsn(pseudo); + } + return; + } + rhsAlign = GetBecommon().GetTypeAlign(rhsType->GetTypeIndex()); + alignUsed = std::min(lhsAlign, rhsAlign); + ASSERT(alignUsed != 0, "expect non-zero"); + for (uint32 i = 0; i < (lhsSize / alignUsed); i++) { + /* generate the load */ + AArch64OfstOperand &rhsOfstOpnd = GetOrCreateOfstOpnd(rhsOffset + i * alignUsed, k32BitSize); + Operand &rhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, alignUsed * k8BitSize, + static_cast(rhsAddrOpnd), nullptr, &rhsOfstOpnd, nullptr); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, alignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + Insn &insn = + GetCG()->BuildInstruction(PickLdInsn(alignUsed * k8BitSize, PTY_u32), result, rhsMemOpnd); + insn.MarkAsAccessRefField(isRefField); + GetCurBB()->AppendInsn(insn); + /* generate the store */ + AArch64OfstOperand &lhsOfstOpnd = GetOrCreateOfstOpnd(lhsOffset + i * alignUsed, k32BitSize); + Operand &lhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, alignUsed * k8BitSize, + static_cast(&lhsAddrOpnd), nullptr, &lhsOfstOpnd, nullptr); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(alignUsed * k8BitSize, PTY_u32), result, lhsMemOpnd)); + } + /* take care of extra content at the end less than the unit of alignUsed */ + uint64 lhsSizeCovered = (lhsSize / alignUsed) * alignUsed; + uint32 newAlignUsed = alignUsed; + while (lhsSizeCovered < lhsSize) { + newAlignUsed = newAlignUsed >> 1; + CHECK_FATAL(newAlignUsed != 0, "expect non-zero"); + if ((lhsSizeCovered + newAlignUsed) > lhsSize) { + continue; + } + /* generate the load */ + AArch64OfstOperand &rhsOfstOpnd = GetOrCreateOfstOpnd(rhsOffset + lhsSizeCovered, k32BitSize); + Operand &rhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, newAlignUsed * k8BitSize, + static_cast(rhsAddrOpnd), nullptr, &rhsOfstOpnd, nullptr); + regno_t vRegNO = NewVReg(kRegTyInt, std::max(4u, newAlignUsed)); + RegOperand &result = CreateVirtualRegisterOperand(vRegNO); + Insn &insn = + GetCG()->BuildInstruction(PickLdInsn(newAlignUsed * k8BitSize, PTY_u32), result, rhsMemOpnd); + insn.MarkAsAccessRefField(isRefField); + GetCurBB()->AppendInsn(insn); + /* generate the store */ + AArch64OfstOperand &lhsOfstOpnd = GetOrCreateOfstOpnd(lhsOffset + lhsSizeCovered, k32BitSize); + Operand &lhsMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, newAlignUsed * k8BitSize, + static_cast(&lhsAddrOpnd), nullptr, &lhsOfstOpnd, nullptr); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(newAlignUsed * k8BitSize, PTY_u32), result, lhsMemOpnd)); + lhsSizeCovered += newAlignUsed; + } + } +} + +Operand *AArch64CGFunc::SelectDread(const BaseNode &parent, DreadNode &expr) { + MIRSymbol *symbol = GetFunction().GetLocalOrGlobalSymbol(expr.GetStIdx()); + if (symbol->IsEhIndex()) { + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx((TyIdx)PTY_i32); + /* use the second register return by __builtin_eh_return(). */ + ReturnMechanism retMech(*type, GetBecommon()); + retMech.SetupSecondRetReg(*type); + return &GetOrCreatePhysicalRegisterOperand(retMech.GetReg1(), k64BitSize, kRegTyInt); + } + + PrimType symType = symbol->GetType()->GetPrimType(); + int32 offset = 0; + bool parmCopy = false; + if (expr.GetFieldID() != 0) { + MIRStructType *structType = static_cast(symbol->GetType()); + ASSERT(structType != nullptr, "SelectDread: non-zero fieldID for non-structure"); + symType = structType->GetFieldType(expr.GetFieldID())->GetPrimType(); + offset = GetBecommon().GetFieldOffset(*structType, expr.GetFieldID()).first; + parmCopy = IsParamStructCopy(*symbol); + } + CHECK_FATAL(symType != PTY_agg, "dread type error"); + uint32 dataSize = GetPrimTypeBitSize(symType); + uint32 aggSize = 0; + if (symType == PTY_agg) { + if (expr.GetPrimType() == PTY_agg) { + aggSize = GetBecommon().GetTypeSize(symbol->GetType()->GetTypeIndex().GetIdx()); + dataSize = k64BitSize; + } else { + dataSize = GetPrimTypeBitSize(expr.GetPrimType()); + } + } + MemOperand *memOpnd = nullptr; + if (aggSize > k8ByteSize) { + if (parent.op == OP_eval) { + if (symbol->GetAttr(ATTR_volatile)) { + /* Need to generate loads for the upper parts of the struct. */ + Operand &dest = AArch64RegOperand::GetZeroRegister(k64BitSize); + uint32 numLoads = RoundUp(aggSize, k64BitSize) / k64BitSize; + for (uint32 o = 0; o < numLoads; ++o) { + if (parmCopy) { + memOpnd = &LoadStructCopyBase(*symbol, offset + o * kSizeOfPtr, kSizeOfPtr); + } else { + memOpnd = &GetOrCreateMemOpnd(*symbol, offset + o * kSizeOfPtr, kSizeOfPtr); + } + if (IsImmediateOffsetOutOfRange(*static_cast(memOpnd), kSizeOfPtr)) { + memOpnd = &SplitOffsetWithAddInstruction(*static_cast(memOpnd), kSizeOfPtr); + } + SelectCopy(dest, PTY_u64, *memOpnd, PTY_u64); + } + } else { + /* No side-effects. No need to generate anything for eval. */ + } + } else { + CHECK_FATAL(0, "SelectDread: Illegal agg size"); + } + } + if (parmCopy) { + memOpnd = &LoadStructCopyBase(*symbol, offset, dataSize); + } else { + memOpnd = &GetOrCreateMemOpnd(*symbol, offset, dataSize); + } + if ((memOpnd->GetMemVaryType() == kNotVary) && + IsImmediateOffsetOutOfRange(*static_cast(memOpnd), dataSize)) { + return &SplitOffsetWithAddInstruction(*static_cast(memOpnd), dataSize); + } + return memOpnd; +} + +RegOperand *AArch64CGFunc::SelectRegread(RegreadNode &expr) { + PregIdx pregIdx = expr.GetRegIdx(); + if (IsSpecialPseudoRegister(pregIdx)) { + /* if it is one of special registers */ + return &GetOrCreateSpecialRegisterOperand(-pregIdx, expr.GetPrimType()); + } + RegOperand ® = GetOrCreateVirtualRegisterOperand(GetVirtualRegNOFromPseudoRegIdx(pregIdx)); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + MemOperand *src = GetPseudoRegisterSpillMemoryOperand(pregIdx); + PrimType stype = GetTypeFromPseudoRegIdx(pregIdx); + MIRPreg *preg = GetFunction().GetPregTab()->PregFromPregIdx(pregIdx); + uint32 srcBitLength = GetPrimTypeSize(preg->GetPrimType()) * kBitsPerByte; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickLdInsn(srcBitLength, stype), reg, *src)); + } + return ® +} + +void AArch64CGFunc::SelectAddrof(Operand &result, StImmOperand &stImm) { + const MIRSymbol *symbol = stImm.GetSymbol(); + if ((symbol->GetStorageClass() == kScAuto) || (symbol->GetStorageClass() == kScFormal)) { + if (!GetCG()->IsQuiet()) { + maple::LogInfo::MapleLogger(kLlErr) << + "Warning: we expect AddrOf with StImmOperand is not used for local variables"; + } + AArch64SymbolAlloc *symLoc = + static_cast(GetMemlayout()->GetSymAllocInfo(symbol->GetStIndex())); + AArch64ImmOperand *offset = nullptr; + if (symLoc->GetMemSegment()->GetMemSegmentKind() == kMsArgsStkPassed) { + offset = &CreateImmOperand(GetBaseOffset(*symLoc) + stImm.GetOffset(), k64BitSize, false, kUnAdjustVary); + } else if (symLoc->GetMemSegment()->GetMemSegmentKind() == kMsRefLocals) { + auto it = immOpndsRequiringOffsetAdjustmentForRefloc.find(symLoc); + if (it != immOpndsRequiringOffsetAdjustmentForRefloc.end()) { + offset = (*it).second; + } else { + offset = &CreateImmOperand(GetBaseOffset(*symLoc) + stImm.GetOffset(), k64BitSize, false); + immOpndsRequiringOffsetAdjustmentForRefloc[symLoc] = offset; + } + } else if (mirModule.IsJavaModule()) { + auto it = immOpndsRequiringOffsetAdjustment.find(symLoc); + if ((it != immOpndsRequiringOffsetAdjustment.end()) && (symbol->GetType()->GetPrimType() != PTY_agg)) { + offset = (*it).second; + } else { + offset = &CreateImmOperand(GetBaseOffset(*symLoc) + stImm.GetOffset(), k64BitSize, false); + if (symbol->GetType()->GetKind() != kTypeClass) { + immOpndsRequiringOffsetAdjustment[symLoc] = offset; + } + } + } else { + /* Do not cache modified symbol location */ + offset = &CreateImmOperand(GetBaseOffset(*symLoc) + stImm.GetOffset(), k64BitSize, false); + } + + SelectAdd(result, *GetBaseReg(*symLoc), *offset, PTY_u64); + if (GetCG()->GenerateVerboseCG()) { + /* Add a comment */ + Insn *insn = GetCurBB()->GetLastInsn(); + std::string comm = "local/formal var: "; + comm.append(symbol->GetName()); + insn->SetComment(comm); + } + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrp, result, stImm)); + if (CGOptions::IsPIC() && ((symbol->GetStorageClass() == kScGlobal) || (symbol->GetStorageClass() == kScExtern))) { + /* ldr x0, [x0, #:got_lo12:Ljava_2Flang_2FSystem_3B_7Cout] */ + AArch64OfstOperand &offset = CreateOfstOpnd(*stImm.GetSymbol(), stImm.GetOffset(), stImm.GetRelocs()); + AArch64MemOperand &memOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, kSizeOfPtr * kBitsPerByte, + static_cast(&result), nullptr, &offset, nullptr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xldr, result, memOpnd)); + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrpl12, result, result, stImm)); + } + } +} + +void AArch64CGFunc::SelectAddrof(Operand &result, AArch64MemOperand &memOpnd) { + const MIRSymbol *symbol = memOpnd.GetSymbol(); + if (symbol->GetStorageClass() == kScAuto) { + auto *offsetOpnd = static_cast(memOpnd.GetOffsetImmediate()); + Operand &immOpnd = CreateImmOperand(offsetOpnd->GetOffsetValue(), PTY_u32, false); + ASSERT(memOpnd.GetBaseRegister() != nullptr, "nullptr check"); + SelectAdd(result, *memOpnd.GetBaseRegister(), immOpnd, PTY_u32); + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrp, result, memOpnd)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrpl12, result, result, memOpnd)); + } +} + +Operand *AArch64CGFunc::SelectAddrof(AddrofNode &expr) { + MIRSymbol *symbol = GetFunction().GetLocalOrGlobalSymbol(expr.GetStIdx()); + int32 offset = 0; + if (expr.GetFieldID() != 0) { + MIRStructType *structType = static_cast(symbol->GetType()); + /* with array of structs, it is possible to have nullptr */ + if (structType != nullptr) { + offset = GetBecommon().GetFieldOffset(*structType, expr.GetFieldID()).first; + } + } + if ((symbol->GetStorageClass() == kScFormal) && (symbol->GetSKind() == kStVar) && + ((expr.GetFieldID() != 0) || + (GetBecommon().GetTypeSize(symbol->GetType()->GetTypeIndex().GetIdx()) > k16ByteSize))) { + /* + * Struct param is copied on the stack by caller if struct size > 16. + * Else if size < 16 then struct param is copied into one or two registers. + */ + RegOperand *stackAddr = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + /* load the base address of the struct copy from stack. */ + SelectAddrof(*stackAddr, CreateStImmOperand(*symbol, 0, 0)); + Operand *structAddr; + if (GetBecommon().GetTypeSize(symbol->GetType()->GetTypeIndex().GetIdx()) <= k16ByteSize) { + isAggParamInReg = true; + structAddr = stackAddr; + } else { + AArch64OfstOperand *offopnd = &CreateOfstOpnd(0, k32BitSize); + AArch64MemOperand *mo = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, kSizeOfPtr * kBitsPerByte, + stackAddr, nullptr, offopnd, nullptr); + structAddr = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xldr, *structAddr, *mo)); + } + if (offset == 0) { + return structAddr; + } else { + /* add the struct offset to the base address */ + Operand *result = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + ImmOperand *imm = &CreateImmOperand(PTY_a64, offset); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *result, *structAddr, *imm)); + return result; + } + } + PrimType ptype = expr.GetPrimType(); + regno_t vRegNO = NewVReg(kRegTyInt, GetPrimTypeSize(ptype)); + Operand &result = CreateVirtualRegisterOperand(vRegNO); + if (symbol->IsReflectionClassInfo() && !symbol->IsReflectionArrayClassInfo() && !GetCG()->IsLibcore()) { + /* + * Turn addrof __cinf_X into a load of _PTR__cinf_X + * adrp x1, _PTR__cinf_Ljava_2Flang_2FSystem_3B + * ldr x1, [x1, #:lo12:_PTR__cinf_Ljava_2Flang_2FSystem_3B] + */ + std::string ptrName = namemangler::kPtrPrefixStr + symbol->GetName(); + MIRType *ptrType = GlobalTables::GetTypeTable().GetPtr(); + symbol = GetMirModule().GetMIRBuilder()->GetOrCreateGlobalDecl(ptrName, *ptrType); + symbol->SetStorageClass(kScFstatic); + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_adrp_ldr, result, + CreateStImmOperand(*symbol, 0, 0))); + return &result; + } + + SelectAddrof(result, CreateStImmOperand(*symbol, offset, 0)); + return &result; +} + +Operand &AArch64CGFunc::SelectAddrofFunc(AddroffuncNode &expr) { + uint32 instrSize = static_cast(expr.SizeOfInstr()); + regno_t vRegNO = NewVReg(kRegTyInt, instrSize); + Operand &operand = CreateVirtualRegisterOperand(vRegNO); + MIRFunction *mirFunction = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(expr.GetPUIdx()); + SelectAddrof(operand, CreateStImmOperand(*mirFunction->GetFuncSymbol(), 0, 0)); + return operand; +} + +/* For an entire aggregate that can fit inside a single 8 byte register. */ +PrimType AArch64CGFunc::GetDestTypeFromAggSize(uint32 bitSize) const { + PrimType primType; + switch (bitSize) { + case k8BitSize: { + primType = PTY_u8; + break; + } + case k16BitSize: { + primType = PTY_u16; + break; + } + case k32BitSize: { + primType = PTY_u32; + break; + } + case k64BitSize: { + primType = PTY_u64; + break; + } + default: + CHECK_FATAL(false, "aggregate of unhandled size"); + } + return primType; +} + +Operand &AArch64CGFunc::SelectAddrofLabel(AddroflabelNode &expr) { + /* adrp reg, label-id */ + Operand &dst = CreateVirtualRegisterOperand(NewVReg(kRegTyInt, expr.SizeOfInstr())); + Operand &immOpnd = CreateImmOperand(expr.GetOffset(), k64BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_adrp_label, dst, immOpnd)); + return dst; +} + +Operand *AArch64CGFunc::SelectIread(const BaseNode &parent, IreadNode &expr) { + int32 offset = 0; + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(expr.GetTyIdx()); + MIRPtrType *pointerType = static_cast(type); + ASSERT(pointerType != nullptr, "expect a pointer type at iread node"); + MIRType *pointedType = nullptr; + bool isRefField = false; + AArch64isa::MemoryOrdering memOrd = AArch64isa::kMoNone; + + if (expr.GetFieldID() != 0) { + MIRType *pointedTy = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType->GetPointedTyIdx()); + MIRStructType *structType = nullptr; + if (pointedTy->GetKind() != kTypeJArray) { + structType = static_cast(pointedTy); + } else { + /* it's a Jarray type. using it's parent's field info: java.lang.Object */ + structType = static_cast(pointedTy)->GetParentType(); + } + + ASSERT(structType != nullptr, "SelectIread: non-zero fieldID for non-structure"); + pointedType = structType->GetFieldType(expr.GetFieldID()); + offset = GetBecommon().GetFieldOffset(*structType, expr.GetFieldID()).first; + isRefField = GetBecommon().IsRefField(*structType, expr.GetFieldID()); + } else { + pointedType = GetPointedToType(*pointerType); + if (GetFunction().IsJava() && (pointedType->GetKind() == kTypePointer)) { + MIRType *nextPointedType = + GlobalTables::GetTypeTable().GetTypeFromTyIdx(static_cast(pointedType)->GetPointedTyIdx()); + if (nextPointedType->GetKind() != kTypeScalar) { + isRefField = true; /* read from an object array, or an high-dimentional array */ + } + } + if (pointedType->GetPrimType() == PTY_agg) { + maple::LogInfo::MapleLogger(kLlErr) << "Error: cannot find field in " << + GlobalTables::GetStrTable().GetStringFromStrIdx(pointedType->GetNameStrIdx()) << '\n'; + CHECK_FATAL(false, "cannot find field"); + return nullptr; + } + } + + RegType regType = GetRegTyFromPrimTy(expr.GetPrimType()); + uint32 regSize = GetPrimTypeSize(expr.GetPrimType()); + if (expr.GetFieldID() == 0 && pointedType->GetPrimType() == PTY_agg) { + /* Maple IR can passing small struct to be loaded into a single register. */ + if (regType == kRegTyFloat) { + /* regsize is correct */ + } else { + uint32 sz = GetBecommon().GetTypeSize(pointedType->GetTypeIndex().GetIdx()); + regSize = (sz <= k4ByteSize) ? k4ByteSize : k8ByteSize; + } + } else if (regSize < k4ByteSize) { + regSize = k4ByteSize; /* 32-bit */ + } + regno_t vRegNO; + Operand *result = nullptr; + if (parent.GetOpCode() == OP_eval) { + /* regSize << 3, that is regSize * 8, change bytes to bits */ + result = &AArch64RegOperand::GetZeroRegister(regSize << 3); + } else { + vRegNO = NewVReg(regType, regSize); + result = &CreateVirtualRegisterOperand(vRegNO); + } + + PrimType destType = pointedType->GetPrimType(); + + uint32 bitSize = 0; + if ((pointedType->GetKind() == kTypeStructIncomplete) || (pointedType->GetKind() == kTypeClassIncomplete) || + (pointedType->GetKind() == kTypeInterfaceIncomplete)) { + bitSize = GetPrimTypeBitSize(expr.GetPrimType()); + maple::LogInfo::MapleLogger(kLlErr) << "Warning: objsize is zero! \n"; + } else { + if (pointedType->IsStructType()) { + MIRStructType *structType = static_cast(pointedType); + if (expr.GetFieldID()) { + /* size << 3, that is size * 8, change bytes to bits */ + bitSize = structType->GetSize() << 3; + } else { + destType = GetDestTypeFromAggSize(bitSize); + } + } else { + bitSize = GetPrimTypeBitSize(destType); + } + if (regType == kRegTyFloat) { + destType = expr.GetPrimType(); + bitSize = GetPrimTypeBitSize(destType); + } else if (destType == PTY_agg) { + switch (bitSize) { + case k8BitSize: + destType = PTY_u8; + break; + case k16BitSize: + destType = PTY_u16; + break; + case k32BitSize: + destType = PTY_u32; + break; + case k64BitSize: + destType = PTY_u64; + break; + default: + CHECK_FATAL(false, "SelectIread: aggregate of wrong size"); + } + } + } + + MemOperand *memOpnd = &CreateMemOpnd(destType, expr, *expr.Opnd(0), offset, memOrd); + if (aggParamReg != nullptr) { + isAggParamInReg = false; + return aggParamReg; + } + if (isVolLoad && (static_cast(memOpnd)->GetAddrMode() == AArch64MemOperand::kAddrModeBOi)) { + memOrd = AArch64isa::kMoAcquire; + isVolLoad = false; + } + + if ((memOpnd->GetMemVaryType() == kNotVary) && + IsImmediateOffsetOutOfRange(*static_cast(memOpnd), bitSize)) { + memOpnd = &SplitOffsetWithAddInstruction(*static_cast(memOpnd), bitSize); + } + + if (memOrd == AArch64isa::kMoNone) { + MOperator mOp = PickLdInsn(bitSize, destType); + Insn &insn = GetCG()->BuildInstruction(mOp, *result, *memOpnd); + if (parent.GetOpCode() == OP_eval && result->IsRegister() && + static_cast(result)->IsZeroRegister()) { + insn.SetComment("null-check"); + } + GetCurBB()->AppendInsn(insn); + + if (parent.op != OP_eval) { + const AArch64MD *md = &AArch64CG::kMd[insn.GetMachineOpcode()]; + OpndProp *prop = md->GetOperand(0); + if ((static_cast(prop)->GetSize()) < insn.GetOperand(0).GetSize()) { + switch (destType) { + case PTY_i8: + mOp = MOP_xsxtb64; + break; + case PTY_i16: + mOp = MOP_xsxth64; + break; + case PTY_i32: + mOp = MOP_xsxtw64; + break; + case PTY_u8: + mOp = MOP_xuxtb32; + break; + case PTY_u16: + mOp = MOP_xuxth32; + break; + case PTY_u32: + mOp = MOP_xuxtw64; + break; + default: + break; + } + GetCurBB()->AppendInsn(cg->BuildInstruction( + mOp, insn.GetOperand(0), insn.GetOperand(0))); + } + } + } else { + AArch64CGFunc::SelectLoadAcquire(*result, destType, *memOpnd, destType, memOrd, false); + } + GetCurBB()->GetLastInsn()->MarkAsAccessRefField(isRefField); + return result; +} + +Operand *AArch64CGFunc::SelectIntConst(MIRIntConst &intConst) { + return &CreateImmOperand(intConst.GetValue(), GetPrimTypeSize(intConst.GetType().GetPrimType()) * kBitsPerByte, + false); +} + +template +Operand *SelectLiteral(T *c, MIRFunction *func, uint32 labelIdx, AArch64CGFunc *cgFunc) { + MIRSymbol *st = func->GetSymTab()->CreateSymbol(kScopeLocal); + std::string lblStr(".LB_"); + MIRSymbol *funcSt = GlobalTables::GetGsymTable().GetSymbolFromStidx(func->GetStIdx().Idx()); + std::string funcName = funcSt->GetName(); + lblStr.append(funcName).append(std::to_string(labelIdx)); + st->SetNameStrIdx(lblStr); + st->SetStorageClass(kScPstatic); + st->SetSKind(kStConst); + st->SetKonst(c); + PrimType primType = c->GetType().GetPrimType(); + st->SetTyIdx(TyIdx(primType)); + uint32 typeBitSize = GetPrimTypeBitSize(primType); + + if (T::GetPrimType() == PTY_f32) { + return (fabs(c->GetValue()) < std::numeric_limits::denorm_min()) + ? static_cast(&cgFunc->GetOrCreateFpZeroOperand(typeBitSize)) + : static_cast(&cgFunc->GetOrCreateMemOpnd(*st, 0, typeBitSize)); + } else if (T::GetPrimType() == PTY_f64) { + return (fabs(c->GetValue()) < std::numeric_limits::denorm_min()) + ? static_cast(&cgFunc->GetOrCreateFpZeroOperand(typeBitSize)) + : static_cast(&cgFunc->GetOrCreateMemOpnd(*st, 0, typeBitSize)); + } else { + CHECK_FATAL(false, "Unsupported const type"); + } + return nullptr; +} + +Operand *AArch64CGFunc::SelectFloatConst(MIRFloatConst &floatConst) { + uint32 labelIdxTmp = GetLabelIdx(); + Operand *result = SelectLiteral(&floatConst, &GetFunction(), labelIdxTmp++, this); + SetLabelIdx(labelIdxTmp); + return result; +} + +Operand *AArch64CGFunc::SelectDoubleConst(MIRDoubleConst &doubleConst) { + uint32 labelIdxTmp = GetLabelIdx(); + Operand *result = SelectLiteral(&doubleConst, &GetFunction(), labelIdxTmp++, this); + SetLabelIdx(labelIdxTmp); + return result; +} + +template +Operand *SelectStrLiteral(T &c, AArch64CGFunc &cgFunc) { + std::string labelStr; + if (c.GetKind() == kConstStrConst) { + labelStr.append(".LUstr_"); + } else if (c.GetKind() == kConstStr16Const) { + labelStr.append(".LUstr16_"); + } else { + CHECK_FATAL(false, "Unsupported literal type"); + } + labelStr.append(std::to_string(c.GetValue())); + + MIRSymbol *labelSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(labelStr)); + if (labelSym == nullptr) { + labelSym = cgFunc.GetMirModule().GetMIRBuilder()->CreateGlobalDecl(labelStr, c.GetType()); + labelSym->SetStorageClass(kScFstatic); + labelSym->SetSKind(kStConst); + /* c may be local, we need a global node here */ + labelSym->SetKonst(cgFunc.NewMirConst(c)); + } + + if (c.GetPrimType() == PTY_ptr) { + StImmOperand &stOpnd = cgFunc.CreateStImmOperand(*labelSym, 0, 0); + RegOperand &addrOpnd = cgFunc.CreateRegisterOperandOfType(PTY_a64); + cgFunc.SelectAddrof(addrOpnd, stOpnd); + return &addrOpnd; + } + CHECK_FATAL(false, "Unsupported const string type"); + return nullptr; +} + +Operand *AArch64CGFunc::SelectStrConst(MIRStrConst &strConst) { + return SelectStrLiteral(strConst, *this); +} + +Operand *AArch64CGFunc::SelectStr16Const(MIRStr16Const &str16Const) { + return SelectStrLiteral(str16Const, *this); +} + +static inline void AppendInstructionTo(Insn &i, CGFunc &f) { + f.GetCurBB()->AppendInsn(i); +} + +/* + * Returns the number of leading 0-bits in x, starting at the most significant bit position. + * If x is 0, the result is -1. + */ +static int32 GetHead0BitNum(int64 val) { + uint32 bitNum = 0; + for (; bitNum < k64BitSize; bitNum++) { + if ((0x8000000000000000ULL >> static_cast(bitNum)) & static_cast(val)) { + break; + } + } + if (bitNum == k64BitSize) { + return -1; + } + return bitNum; +} + +/* + * Returns the number of trailing 0-bits in x, starting at the least significant bit position. + * If x is 0, the result is -1. + */ +static int32 GetTail0BitNum(int64 val) { + uint32 bitNum = 0; + for (; bitNum < k64BitSize; bitNum++) { + if ((static_cast(1) << static_cast(bitNum)) & static_cast(val)) { + break; + } + } + if (bitNum == k64BitSize) { + return -1; + } + return bitNum; +} + +/* + * If the input integer is power of 2, return log2(input) + * else return -1 + */ +static inline int32 IsPowerOf2(int64 val) { + if (__builtin_popcountll(val) == 1) { + return __builtin_ffsll(val) - 1; + } + return -1; +} + +MOperator AArch64CGFunc::PickJmpInsn(Opcode brOp, Opcode cmpOp, bool isFloat, bool isSigned) { + switch (cmpOp) { + case OP_ne: + return (brOp == OP_brtrue) ? MOP_bne : MOP_beq; + case OP_eq: + return (brOp == OP_brtrue) ? MOP_beq : MOP_bne; + case OP_lt: + return (brOp == OP_brtrue) ? (isSigned ? MOP_blt : MOP_blo) + : (isFloat ? MOP_bpl : (isSigned ? MOP_bge : MOP_bhs)); + case OP_le: + return (brOp == OP_brtrue) ? (isSigned ? MOP_ble : MOP_bls) + : (isFloat ? MOP_bhi : (isSigned ? MOP_bgt : MOP_bhi)); + case OP_gt: + return (brOp == OP_brtrue) ? (isFloat ? MOP_bhi : (isSigned ? MOP_bgt : MOP_bhi)) + : (isSigned ? MOP_ble : MOP_bls); + case OP_ge: + return (brOp == OP_brtrue) ? (isFloat ? MOP_bpl : (isSigned ? MOP_bge : MOP_bhs)) + : (isSigned ? MOP_blt : MOP_blo); + default: + CHECK_FATAL(false, "PickJmpInsn error"); + } +} + +bool AArch64CGFunc::GenerateCompareWithZeroInstruction(Opcode jmpOp, Opcode cmpOp, bool is64Bits, + LabelOperand &targetOpnd, Operand &opnd0) { + bool finish = true; + MOperator mOpCode = MOP_undef; + switch (cmpOp) { + case OP_ne: { + if (jmpOp == OP_brtrue) { + mOpCode = is64Bits ? MOP_xcbnz : MOP_wcbnz; + } else { + mOpCode = is64Bits ? MOP_xcbz : MOP_wcbz; + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, opnd0, targetOpnd)); + break; + } + case OP_eq: { + if (jmpOp == OP_brtrue) { + mOpCode = is64Bits ? MOP_xcbz : MOP_wcbz; + } else { + mOpCode = is64Bits ? MOP_xcbnz : MOP_wcbnz; + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, opnd0, targetOpnd)); + break; + } + /* + * TBZ/TBNZ instruction have a range of +/-32KB, need to check if the jump target is reachable in a later + * phase. If the branch target is not reachable, then we change tbz/tbnz into combination of ubfx and + * cbz/cbnz, which will clobber one extra register. With LSRA under O2, we can use of the reserved registers + * for that purpose. + */ + case OP_lt: { + ImmOperand &signBit = CreateImmOperand(is64Bits ? kHighestBitOf64Bits : kHighestBitOf32Bits, k8BitSize, false); + if (jmpOp == OP_brtrue) { + mOpCode = is64Bits ? MOP_xtbnz : MOP_wtbnz; + } else { + mOpCode = is64Bits ? MOP_xtbz : MOP_wtbz; + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, opnd0, signBit, targetOpnd)); + break; + } + case OP_ge: { + ImmOperand &signBit = CreateImmOperand(is64Bits ? kHighestBitOf64Bits : kHighestBitOf32Bits, k8BitSize, false); + if (jmpOp == OP_brtrue) { + mOpCode = is64Bits ? MOP_xtbz : MOP_wtbz; + } else { + mOpCode = is64Bits ? MOP_xtbnz : MOP_wtbnz; + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, opnd0, signBit, targetOpnd)); + break; + } + default: + finish = false; + break; + } + return finish; +} + +void AArch64CGFunc::SelectIgoto(Operand *opnd0) { + Operand *srcOpnd = opnd0; + if (opnd0->GetKind() == Operand::kOpdMem) { + Operand *dst = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xldr, *dst, *opnd0)); + srcOpnd = dst; + } + GetCurBB()->SetKind(BB::kBBIgoto); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xbr, *srcOpnd)); +} + +void AArch64CGFunc::SelectCondGoto(LabelOperand &targetOpnd, Opcode jmpOp, Opcode cmpOp, Operand &origOpnd0, + Operand &origOpnd1, PrimType primType) { + Operand *opnd0 = &origOpnd0; + Operand *opnd1 = &origOpnd1; + opnd0 = &LoadIntoRegister(origOpnd0, primType); + + bool is64Bits = GetPrimTypeBitSize(primType) == k64BitSize; + bool isFloat = IsPrimitiveFloat(primType); + Operand &rflag = GetOrCreateRflag(); + if (isFloat) { + opnd1 = &LoadIntoRegister(origOpnd1, primType); + MOperator mOp = is64Bits ? MOP_dcmperr : ((GetPrimTypeBitSize(primType) == k32BitSize) ? MOP_scmperr : MOP_hcmperr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, rflag, *opnd0, *opnd1)); + } else { + bool isImm = ((origOpnd1.GetKind() == Operand::kOpdImmediate) || (origOpnd1.GetKind() == Operand::kOpdOffset)); + if ((origOpnd1.GetKind() != Operand::kOpdRegister) && !isImm) { + opnd1 = &SelectCopy(origOpnd1, primType, primType); + } + MOperator mOp = is64Bits ? MOP_xcmprr : MOP_wcmprr; + + if (isImm) { + /* Special cases, i.e., comparing with zero + * Do not perform optimization for C, unlike Java which has no unsigned int. + */ + if (static_cast(opnd1)->IsZero() && (Globals::GetInstance()->GetOptimLevel() > 0) && + ((mirModule.GetSrcLang() != kSrcLangC) || ((primType != PTY_u64) && (primType != PTY_u32)))) { + bool finish = GenerateCompareWithZeroInstruction(jmpOp, cmpOp, is64Bits, targetOpnd, *opnd0); + if (finish) { + return; + } + } + + /* + * aarch64 assembly takes up to 24-bits immediate, generating + * either cmp or cmp with shift 12 encoding + */ + ImmOperand *immOpnd = static_cast(opnd1); + if (immOpnd->IsInBitSize(kMaxImmVal12Bits, 0) || + immOpnd->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits)) { + mOp = is64Bits ? MOP_xcmpri : MOP_wcmpri; + } else { + opnd1 = &SelectCopy(*opnd1, primType, primType); + } + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, rflag, *opnd0, *opnd1)); + } + + MOperator jmpOperator = PickJmpInsn(jmpOp, cmpOp, isFloat, IsSignedInteger(primType)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(jmpOperator, rflag, targetOpnd)); +} + +/* + * brtrue @label0 (ge u8 i32 ( + * cmp i32 i64 (dread i64 %Reg2_J, dread i64 %Reg4_J), + * constval i32 0)) + * ===> + * cmp r1, r2 + * bge Cond, label0 + */ +void AArch64CGFunc::SelectCondSpecialCase1(CondGotoNode &stmt, BaseNode &expr) { + ASSERT(expr.GetOpCode() == OP_cmp, "unexpect opcode"); + Operand *opnd0 = HandleExpr(expr, *expr.Opnd(0)); + Operand *opnd1 = HandleExpr(expr, *expr.Opnd(1)); + CompareNode *node = static_cast(&expr); + bool isFloat = IsPrimitiveFloat(node->GetOpndType()); + opnd0 = &LoadIntoRegister(*opnd0, node->GetOpndType()); + /* + * most of FP constants are passed as AArch64MemOperand + * except 0.0 which is passed as kOpdFPZeroImmediate + */ + Operand::OperandType opnd1Type = opnd1->GetKind(); + if ((opnd1Type != Operand::kOpdImmediate) && (opnd1Type != Operand::kOpdFPZeroImmediate) && + (opnd1Type != Operand::kOpdOffset)) { + opnd1 = &LoadIntoRegister(*opnd1, node->GetOpndType()); + } + SelectAArch64Cmp(*opnd0, *opnd1, !isFloat, GetPrimTypeBitSize(node->GetOpndType())); + /* handle condgoto now. */ + LabelIdx labelIdx = stmt.GetOffset(); + BaseNode *condNode = stmt.Opnd(0); + LabelOperand &targetOpnd = GetOrCreateLabelOperand(labelIdx); + Opcode cmpOp = condNode->GetOpCode(); + PrimType pType = static_cast(condNode)->GetOpndType(); + isFloat = IsPrimitiveFloat(pType); + Operand &rflag = GetOrCreateRflag(); + MOperator jmpOp = PickJmpInsn(stmt.GetOpCode(), cmpOp, isFloat, IsSignedInteger(pType)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(jmpOp, rflag, targetOpnd)); +} + +/* + * Special case: + * brfalse(ge (cmpg (op0, op1), 0) ==> + * fcmp op1, op2 + * blo + */ +void AArch64CGFunc::SelectCondSpecialCase2(const CondGotoNode &stmt, BaseNode &expr) { + auto &cmpNode = static_cast(expr); + Operand *opnd0 = HandleExpr(cmpNode, *cmpNode.Opnd(0)); + Operand *opnd1 = HandleExpr(cmpNode, *cmpNode.Opnd(1)); + PrimType operandType = cmpNode.GetOpndType(); + opnd0 = opnd0->IsRegister() ? static_cast(opnd0) + : &SelectCopy(*opnd0, operandType, operandType); + Operand::OperandType opnd1Type = opnd1->GetKind(); + if ((opnd1Type != Operand::kOpdImmediate) && (opnd1Type != Operand::kOpdFPZeroImmediate) && + (opnd1Type != Operand::kOpdOffset)) { + opnd1 = opnd1->IsRegister() ? static_cast(opnd1) + : &SelectCopy(*opnd1, operandType, operandType); + } +#ifdef DEBUG + bool isFloat = IsPrimitiveFloat(operandType); + if (!isFloat) { + ASSERT(false, "incorrect operand types"); + } +#endif + SelectTargetFPCmpQuiet(*opnd0, *opnd1, GetPrimTypeBitSize(operandType)); + Operand &rFlag = GetOrCreateRflag(); + LabelIdx tempLabelIdx = stmt.GetOffset(); + LabelOperand &targetOpnd = GetOrCreateLabelOperand(tempLabelIdx); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_blo, rFlag, targetOpnd)); +} + +void AArch64CGFunc::SelectCondGoto(CondGotoNode &stmt, Operand &opnd0, Operand &opnd1) { + /* + * handle brfalse/brtrue op, opnd0 can be a compare node or non-compare node + * such as a dread for example + */ + LabelIdx labelIdx = stmt.GetOffset(); + BaseNode *condNode = stmt.Opnd(0); + LabelOperand &targetOpnd = GetOrCreateLabelOperand(labelIdx); + Opcode cmpOp; + + if (opnd0.IsRegister() && (static_cast(&opnd0)->GetValidBitsNum() == 1) && + (condNode->GetOpCode() == OP_lior)) { + ImmOperand &condBit = CreateImmOperand(0, k8BitSize, false); + if (stmt.GetOpCode() == OP_brtrue) { + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(MOP_wtbnz, static_cast(opnd0), condBit, targetOpnd)); + } else { + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(MOP_wtbz, static_cast(opnd0), condBit, targetOpnd)); + } + return; + } + + PrimType pType; + if (kOpcodeInfo.IsCompare(condNode->GetOpCode())) { + cmpOp = condNode->GetOpCode(); + pType = static_cast(condNode)->GetOpndType(); + } else { + /* not a compare node; dread for example, take its pType */ + cmpOp = OP_ne; + pType = condNode->GetPrimType(); + } + + SelectCondGoto(targetOpnd, stmt.GetOpCode(), cmpOp, opnd0, opnd1, pType); +} + +void AArch64CGFunc::SelectGoto(GotoNode &stmt) { + Operand &targetOpnd = GetOrCreateLabelOperand(stmt.GetOffset()); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xuncond, targetOpnd)); +} + +Operand *AArch64CGFunc::SelectAdd(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(dtype); + /* promoted type */ + PrimType primType = + isFloat ? dtype : ((is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32))); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectAdd(resOpnd, opnd0, opnd1, primType); + return &resOpnd; +} + +void AArch64CGFunc::SelectAdd(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + Operand::OperandType opnd0Type = opnd0.GetKind(); + Operand::OperandType opnd1Type = opnd1.GetKind(); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + if (opnd0Type != Operand::kOpdRegister) { + /* add #imm, #imm */ + if (opnd1Type != Operand::kOpdRegister) { + SelectAdd(resOpnd, SelectCopy(opnd0, primType, primType), opnd1, primType); + return; + } + /* add #imm, reg */ + SelectAdd(resOpnd, opnd1, opnd0, primType); /* commutative */ + return; + } + /* add reg, reg */ + if (opnd1Type == Operand::kOpdRegister) { + ASSERT(IsPrimitiveFloat(primType) || IsPrimitiveInteger(primType), "NYI add"); + MOperator mOp = IsPrimitiveFloat(primType) ? + (is64Bits ? MOP_dadd : MOP_sadd) : (is64Bits ? MOP_xaddrrr : MOP_waddrrr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, opnd1)); + return; + } else if (!((opnd1Type == Operand::kOpdImmediate) || (opnd1Type == Operand::kOpdOffset))) { + /* add reg, otheregType */ + SelectAdd(resOpnd, opnd0, SelectCopy(opnd1, primType, primType), primType); + return; + } else { + /* add reg, #imm */ + AArch64ImmOperand *immOpnd = static_cast(&opnd1); + if (immOpnd->IsNegative()) { + immOpnd->Negate(); + SelectSub(resOpnd, opnd0, *immOpnd, primType); + return; + } + if (immOpnd->IsInBitSize(kMaxImmVal24Bits, 0)) { + /* + * ADD Wd|WSP, Wn|WSP, #imm{, shift} ; 32-bit general registers + * ADD Xd|SP, Xn|SP, #imm{, shift} ; 64-bit general registers + * imm : 0 ~ 4095, shift: none, LSL #0, or LSL #12 + * aarch64 assembly takes up to 24-bits, if the lower 12 bits is all 0 + */ + MOperator mOpCode = MOP_undef; + Operand *newOpnd0 = &opnd0; + if (!(immOpnd->IsInBitSize(kMaxImmVal12Bits, 0) || + immOpnd->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits))) { + /* process higher 12 bits */ + ImmOperand &immOpnd2 = + CreateImmOperand(static_cast(static_cast(immOpnd->GetValue()) >> kMaxImmVal12Bits), + immOpnd->GetSize(), immOpnd->IsSignedValue()); + mOpCode = is64Bits ? MOP_xaddrri24 : MOP_waddrri24; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, opnd0, immOpnd2, addSubLslOperand); + GetCurBB()->AppendInsn(newInsn); + immOpnd->ModuloByPow2(static_cast(kMaxImmVal12Bits)); + newOpnd0 = &resOpnd; + } + /* process lower 12 bits */ + mOpCode = is64Bits ? MOP_xaddrri12 : MOP_waddrri12; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, *newOpnd0, *immOpnd); + GetCurBB()->AppendInsn(newInsn); + return; + } + /* load into register */ + int64 immVal = immOpnd->GetValue(); + int32 tail0bitNum = GetTail0BitNum(immVal); + int32 head0bitNum = GetHead0BitNum(immVal); + const int32 bitNum = k64BitSize - head0bitNum - tail0bitNum; + RegOperand ®Opnd = CreateRegisterOperandOfType(primType); + if (isAfterRegAlloc) { + RegType regty = GetRegTyFromPrimTy(primType); + uint32 bytelen = GetPrimTypeSize(primType); + regOpnd = GetOrCreatePhysicalRegisterOperand((AArch64reg)(R16), bytelen, regty); + } + + if (bitNum <= k16ValidBit) { + int64 newImm = (static_cast(immVal) >> static_cast(tail0bitNum)) & 0xFFFF; + AArch64ImmOperand &immOpnd1 = CreateImmOperand(newImm, k16BitSize, false); + SelectCopyImm(regOpnd, immOpnd1, primType); + uint32 mopBadd = is64Bits ? MOP_xaddrrrs : MOP_waddrrrs; + int32 bitLen = is64Bits ? kBitLenOfShift64Bits : kBitLenOfShift32Bits; + BitShiftOperand &bitShiftOpnd = CreateBitShiftOperand(BitShiftOperand::kLSL, tail0bitNum, bitLen); + Insn &newInsn = GetCG()->BuildInstruction(mopBadd, resOpnd, opnd0, regOpnd, bitShiftOpnd); + GetCurBB()->AppendInsn(newInsn); + return; + } + + SelectCopyImm(regOpnd, *immOpnd, primType); + MOperator mOpCode = is64Bits ? MOP_xaddrrr : MOP_waddrrr; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, opnd0, regOpnd); + GetCurBB()->AppendInsn(newInsn); + } +} + +Operand &AArch64CGFunc::SelectCGArrayElemAdd(BinaryNode &node) { + BaseNode *opnd0 = node.Opnd(0); + BaseNode *opnd1 = node.Opnd(1); + ASSERT(opnd1->GetOpCode() == OP_constval, "Internal error, opnd1->op should be OP_constval."); + + switch (opnd0->op) { + case OP_regread: { + RegreadNode *regreadNode = static_cast(opnd0); + return *SelectRegread(*regreadNode); + } + case OP_addrof: { + AddrofNode *addrofNode = static_cast(opnd0); + MIRSymbol &symbol = *mirModule.CurFunction()->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); + ASSERT(addrofNode->GetFieldID() == 0, "For debug SelectCGArrayElemAdd."); + + PrimType primType = addrofNode->GetPrimType(); + regno_t vRegNo = NewVReg(kRegTyInt, GetPrimTypeSize(primType)); + Operand &result = CreateVirtualRegisterOperand(vRegNo); + + // OP_constval + ConstvalNode *constvalNode = static_cast(opnd1); + MIRConst *mirConst = constvalNode->GetConstVal(); + MIRIntConst *mirIntConst = static_cast(mirConst); + SelectAddrof(result, CreateStImmOperand(symbol, mirIntConst->GetValue(), 0)); + + return result; + } + default: + CHECK_FATAL(0, "Internal error, cannot handle opnd0."); + } +} + +void AArch64CGFunc::SelectSub(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + Operand::OperandType opnd1Type = opnd1.GetKind(); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(primType); + Operand *opnd0Bak = &LoadIntoRegister(opnd0, primType); + if (opnd1Type == Operand::kOpdRegister) { + MOperator mOp = isFloat ? (is64Bits ? MOP_dsub : MOP_ssub) : (is64Bits ? MOP_xsubrrr : MOP_wsubrrr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, *opnd0Bak, opnd1)); + return; + } + + if ((opnd1Type != Operand::kOpdImmediate) && (opnd1Type != Operand::kOpdOffset)) { + SelectSub(resOpnd, *opnd0Bak, SelectCopy(opnd1, primType, primType), primType); + return; + } + + AArch64ImmOperand *immOpnd = static_cast(&opnd1); + if (immOpnd->IsNegative()) { + immOpnd->Negate(); + SelectAdd(resOpnd, *opnd0Bak, *immOpnd, primType); + return; + } + + if (immOpnd->IsInBitSize(kMaxImmVal24Bits, 0)) { + /* + * SUB Wd|WSP, Wn|WSP, #imm{, shift} ; 32-bit general registers + * SUB Xd|SP, Xn|SP, #imm{, shift} ; 64-bit general registers + * imm : 0 ~ 4095, shift: none, LSL #0, or LSL #12 + * aarch64 assembly takes up to 24-bits, if the lower 12 bits is all 0 + */ + MOperator mOpCode = MOP_undef; + if (!(immOpnd->IsInBitSize(kMaxImmVal12Bits, 0) || + immOpnd->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits))) { + /* process higher 12 bits */ + ImmOperand &immOpnd2 = + CreateImmOperand(static_cast(static_cast(immOpnd->GetValue()) >> kMaxImmVal12Bits), + immOpnd->GetSize(), immOpnd->IsSignedValue()); + mOpCode = is64Bits ? MOP_xsubrri24 : MOP_wsubrri24; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, *opnd0Bak, immOpnd2, addSubLslOperand); + GetCurBB()->AppendInsn(newInsn); + immOpnd->ModuloByPow2(static_cast(kMaxImmVal12Bits)); + opnd0Bak = &resOpnd; + } + /* process lower 12 bits */ + mOpCode = is64Bits ? MOP_xsubrri12 : MOP_wsubrri12; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, *opnd0Bak, *immOpnd); + GetCurBB()->AppendInsn(newInsn); + return; + } + + /* load into register */ + int64 immVal = immOpnd->GetValue(); + int32 tail0bitNum = GetTail0BitNum(immVal); + int32 head0bitNum = GetHead0BitNum(immVal); + const int32 bitNum = k64BitSize - head0bitNum - tail0bitNum; + RegOperand ®Opnd = CreateRegisterOperandOfType(primType); + if (isAfterRegAlloc) { + RegType regty = GetRegTyFromPrimTy(primType); + uint32 bytelen = GetPrimTypeSize(primType); + regOpnd = GetOrCreatePhysicalRegisterOperand((AArch64reg)(R16), bytelen, regty); + } + + if (bitNum <= k16ValidBit) { + int64 newImm = (static_cast(immVal) >> static_cast(tail0bitNum)) & 0xFFFF; + AArch64ImmOperand &immOpnd1 = CreateImmOperand(newImm, k16BitSize, false); + SelectCopyImm(regOpnd, immOpnd1, primType); + uint32 mopBsub = is64Bits ? MOP_xsubrrrs : MOP_wsubrrrs; + int32 bitLen = is64Bits ? kBitLenOfShift64Bits : kBitLenOfShift32Bits; + BitShiftOperand &bitShiftOpnd = CreateBitShiftOperand(BitShiftOperand::kLSL, tail0bitNum, bitLen); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(mopBsub, resOpnd, *opnd0Bak, regOpnd, bitShiftOpnd)); + return; + } + + SelectCopyImm(regOpnd, *immOpnd, primType); + MOperator mOpCode = is64Bits ? MOP_xsubrrr : MOP_wsubrrr; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, *opnd0Bak, regOpnd); + GetCurBB()->AppendInsn(newInsn); +} + +Operand *AArch64CGFunc::SelectSub(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(dtype); + /* promoted type */ + PrimType primType = + isFloat ? dtype : ((is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32))); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectSub(resOpnd, opnd0, opnd1, primType); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectMpy(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(dtype); + /* promoted type */ + PrimType primType = + isFloat ? dtype : ((is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32))); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectMpy(resOpnd, opnd0, opnd1, primType); + return &resOpnd; +} + +void AArch64CGFunc::SelectMpy(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + Operand::OperandType opnd0Type = opnd0.GetKind(); + Operand::OperandType opnd1Type = opnd1.GetKind(); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + + if (((opnd0Type == Operand::kOpdImmediate) || (opnd0Type == Operand::kOpdOffset) || + (opnd1Type == Operand::kOpdImmediate) || (opnd1Type == Operand::kOpdOffset)) && + IsPrimitiveInteger(primType)) { + ImmOperand *imm = + ((opnd0Type == Operand::kOpdImmediate) || (opnd0Type == Operand::kOpdOffset)) ? static_cast(&opnd0) + : static_cast(&opnd1); + Operand *otherOp = ((opnd0Type == Operand::kOpdImmediate) || (opnd0Type == Operand::kOpdOffset)) ? &opnd1 : &opnd0; + int64 immValue = llabs(imm->GetValue()); + if (immValue != 0 && (static_cast(immValue) & (static_cast(immValue) - 1)) == 0) { + /* immValue is 1 << n */ + if (otherOp->GetKind() != Operand::kOpdRegister) { + otherOp = &SelectCopy(*otherOp, primType, primType); + } + AArch64ImmOperand &shiftNum = CreateImmOperand(__builtin_ffsll(immValue) - 1, dsize, false); + SelectShift(resOpnd, *otherOp, shiftNum, kShiftLeft, primType); + if (imm->GetValue() < 0) { + SelectNeg(resOpnd, resOpnd, primType); + } + + return; + } else if (immValue > 2) { + uint32 zeroNum = __builtin_ffsll(immValue) - 1; + int64 headVal = static_cast(immValue) >> zeroNum; + /* + * if (headVal + 1) & (headVal) == 0, that is (immVal >> zeroNum) + 1 == 1 << n + * otherOp * immVal = (otherOp * (immVal >> zeroNum) * (1 << zeroNum) + * = (otherOp * ((immVal >> zeroNum) + 1) - otherOp) * (1 << zeroNum) + */ + if (((static_cast(headVal) + 1) & static_cast(headVal)) == 0) { + if (otherOp->GetKind() != Operand::kOpdRegister) { + otherOp = &SelectCopy(*otherOp, primType, primType); + } + AArch64ImmOperand &shiftNum1 = CreateImmOperand(__builtin_ffsll(headVal + 1) - 1, dsize, false); + RegOperand &tmpOpnd = CreateRegisterOperandOfType(primType); + SelectShift(tmpOpnd, *otherOp, shiftNum1, kShiftLeft, primType); + SelectSub(resOpnd, tmpOpnd, *otherOp, primType); + AArch64ImmOperand &shiftNum2 = CreateImmOperand(zeroNum, dsize, false); + SelectShift(resOpnd, resOpnd, shiftNum2, kShiftLeft, primType); + if (imm->GetValue() < 0) { + SelectNeg(resOpnd, resOpnd, primType); + } + + return; + } + /* + * if (headVal - 1) & (headVal - 2) == 0, that is (immVal >> zeroNum) - 1 == 1 << n + * otherOp * immVal = (otherOp * (immVal >> zeroNum) * (1 << zeroNum) + * = (otherOp * ((immVal >> zeroNum) - 1) + otherOp) * (1 << zeroNum) + */ + if (((static_cast(headVal) - 1) & (static_cast(headVal) - 2)) == 0) { + if (otherOp->GetKind() != Operand::kOpdRegister) { + otherOp = &SelectCopy(*otherOp, primType, primType); + } + AArch64ImmOperand &shiftNum1 = CreateImmOperand(__builtin_ffsll(headVal - 1) - 1, dsize, false); + RegOperand &tmpOpnd = CreateRegisterOperandOfType(primType); + SelectShift(tmpOpnd, *otherOp, shiftNum1, kShiftLeft, primType); + SelectAdd(resOpnd, tmpOpnd, *otherOp, primType); + AArch64ImmOperand &shiftNum2 = CreateImmOperand(zeroNum, dsize, false); + SelectShift(resOpnd, resOpnd, shiftNum2, kShiftLeft, primType); + if (imm->GetValue() < 0) { + SelectNeg(resOpnd, resOpnd, primType); + } + + return; + } + } + } + + if ((opnd0Type != Operand::kOpdRegister) && (opnd1Type != Operand::kOpdRegister)) { + SelectMpy(resOpnd, SelectCopy(opnd0, primType, primType), opnd1, primType); + } else if ((opnd0Type == Operand::kOpdRegister) && (opnd1Type != Operand::kOpdRegister)) { + SelectMpy(resOpnd, opnd0, SelectCopy(opnd1, primType, primType), primType); + } else if ((opnd0Type != Operand::kOpdRegister) && (opnd1Type == Operand::kOpdRegister)) { + SelectMpy(resOpnd, opnd1, opnd0, primType); + } else { + ASSERT(IsPrimitiveFloat(primType) || IsPrimitiveInteger(primType), "NYI Mpy"); + MOperator mOp = IsPrimitiveFloat(primType) ? (is64Bits ? MOP_xvmuld : MOP_xvmuls) + : (is64Bits ? MOP_xmulrrr : MOP_wmulrrr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, opnd1)); + } +} + +void AArch64CGFunc::SelectDiv(Operand &resOpnd, Operand &origOpnd0, Operand &opnd1, PrimType primType) { + Operand &opnd0 = LoadIntoRegister(origOpnd0, primType); + Operand::OperandType opnd0Type = opnd0.GetKind(); + Operand::OperandType opnd1Type = opnd1.GetKind(); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + + if (Globals::GetInstance()->GetOptimLevel() > 0) { + if (((opnd1Type == Operand::kOpdImmediate) || (opnd1Type == Operand::kOpdOffset)) && IsSignedInteger(primType)) { + ImmOperand *imm = static_cast(&opnd1); + int64 immValue = llabs(imm->GetValue()); + if ((immValue != 0) && (static_cast(immValue) & (static_cast(immValue) - 1)) == 0) { + if (immValue == 1) { + if (imm->GetValue() > 0) { + uint32 mOp = is64Bits ? MOP_xmovrr : MOP_wmovrr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0)); + } else { + SelectNeg(resOpnd, opnd0, primType); + } + + return; + } + int32 shiftNumber = __builtin_ffsll(immValue) - 1; + AArch64ImmOperand &shiftNum = CreateImmOperand(shiftNumber, dsize, false); + SelectShift(resOpnd, opnd0, CreateImmOperand(dsize - 1, dsize, false), kShiftAright, primType); + uint32 mopBadd = is64Bits ? MOP_xaddrrrs : MOP_waddrrrs; + int32 bitLen = is64Bits ? kBitLenOfShift64Bits : kBitLenOfShift32Bits; + BitShiftOperand &shiftOpnd = CreateBitShiftOperand(BitShiftOperand::kLSR, dsize - shiftNumber, bitLen); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopBadd, resOpnd, opnd0, resOpnd, shiftOpnd)); + SelectShift(resOpnd, resOpnd, shiftNum, kShiftAright, primType); + if (imm->GetValue() < 0) { + SelectNeg(resOpnd, resOpnd, primType); + } + + return; + } + } else if (((opnd1Type == Operand::kOpdImmediate) || (opnd1Type == Operand::kOpdOffset)) && + IsUnsignedInteger(primType)) { + ImmOperand *imm = static_cast(&opnd1); + if (imm->GetValue() != 0) { + if ((imm->GetValue() > 0) && + ((static_cast(imm->GetValue()) & (static_cast(imm->GetValue()) - 1)) == 0)) { + AArch64ImmOperand &shiftNum = CreateImmOperand(__builtin_ffsll(imm->GetValue()) - 1, dsize, false); + SelectShift(resOpnd, opnd0, shiftNum, kShiftLright, primType); + + return; + } else if (imm->GetValue() < 0) { + SelectAArch64Cmp(opnd0, *imm, true, dsize); + SelectAArch64CSet(resOpnd, GetCondOperand(CC_CS), is64Bits); + + return; + } + } + } + } + + if (opnd0Type != Operand::kOpdRegister) { + SelectDiv(resOpnd, SelectCopy(opnd0, primType, primType), opnd1, primType); + } else if (opnd1Type != Operand::kOpdRegister) { + SelectDiv(resOpnd, opnd0, SelectCopy(opnd1, primType, primType), primType); + } else { + ASSERT(IsPrimitiveFloat(primType) || IsPrimitiveInteger(primType), "NYI Div"); + MOperator mOp = IsPrimitiveFloat(primType) ? (is64Bits ? MOP_ddivrrr : MOP_sdivrrr) + : (IsSignedInteger(primType) ? (is64Bits ? MOP_xsdivrrr : MOP_wsdivrrr) + : (is64Bits ? MOP_xudivrrr : MOP_wudivrrr)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, opnd1)); + } +} + +Operand *AArch64CGFunc::SelectDiv(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(dtype); + /* promoted type */ + PrimType primType = + isFloat ? dtype : ((is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32))); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectDiv(resOpnd, opnd0, opnd1, primType); + return &resOpnd; +} + +void AArch64CGFunc::SelectRem(Operand &resOpnd, Operand &lhsOpnd, Operand &rhsOpnd, PrimType primType, bool isSigned, + bool is64Bits) { + Operand &opnd0 = LoadIntoRegister(lhsOpnd, primType); + Operand &opnd1 = LoadIntoRegister(rhsOpnd, primType); + + ASSERT(IsPrimitiveInteger(primType), "Wrong type for REM"); + /* + * printf("%d \n", 29 % 7 ); + * -> 1 + * printf("%u %d \n", (unsigned)-7, (unsigned)(-7) % 7 ); + * -> 4294967289 4 + * printf("%d \n", (-7) % 7 ); + * -> 0 + * printf("%d \n", 237 % -7 ); + * 6-> + * printf("implicit i->u conversion %d \n", ((unsigned)237) % -7 ); + * implicit conversion 237 + + * http://stackoverflow.com/questions/35351470/obtaining-remainder-using-single-aarch64-instruction + * input: x0=dividend, x1=divisor + * udiv|sdiv x2, x0, x1 + * msub x3, x2, x1, x0 -- multply-sub : x3 <- x0 - x2*x1 + * result: x2=quotient, x3=remainder + * + * allocate temporary register + */ + RegOperand &temp = CreateRegisterOperandOfType(primType); + Insn *movImmInsn = GetCurBB()->GetLastInsn(); + /* + * mov w1, #2 + * sdiv wTemp, w0, w1 + * msub wRespond, wTemp, w1, w0 + * ========> + * asr wTemp, w0, #31 + * lsr wTemp, wTemp, #31 (#30 for 4, #29 for 8, ...) + * add wRespond, w0, wTemp + * and wRespond, wRespond, #1 (#3 for 4, #7 for 8, ...) + * sub wRespond, wRespond, w2 + * + * if divde by 2 + * ========> + * lsr wTemp, w0, #31 + * add wRespond, w0, wTemp + * and wRespond, wRespond, #1 + * sub wRespond, wRespond, w2 + */ + if ((Globals::GetInstance()->GetOptimLevel() >= CGOptions::kLevel2) && movImmInsn && + ((movImmInsn->GetMachineOpcode() == MOP_xmovri32) || (movImmInsn->GetMachineOpcode() == MOP_xmovri64)) && + movImmInsn->GetOperand(0).Equals(opnd1)) { + auto &imm = static_cast(movImmInsn->GetOperand(kInsnSecondOpnd)); + /* positive or negative do not have effect on the result */ + const int64 dividor = (imm.GetValue() >= 0) ? imm.GetValue() : ((-1) * imm.GetValue()); + const int64 Log2OfDividor = IsPowerOf2(dividor); + if ((dividor != 0) && (Log2OfDividor > 0)) { + GetCurBB()->RemoveInsn(*GetCurBB()->GetLastInsn()); + if (is64Bits) { + CHECK_FATAL(Log2OfDividor < k64BitSize, "imm out of bound"); + AArch64ImmOperand &rightShiftValue = CreateImmOperand(k64BitSize - Log2OfDividor, k64BitSize, isSigned); + if (Log2OfDividor != 1) { + /* 63->shift ALL , 32 ->32bit register */ + AArch64ImmOperand &rightShiftAll = CreateImmOperand(63, k64BitSize, isSigned); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xasrrri6, temp, opnd0, rightShiftAll)); + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xlsrrri6, temp, temp, rightShiftValue)); + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xlsrrri6, temp, opnd0, rightShiftValue)); + } + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrrr, resOpnd, opnd0, temp)); + + AArch64ImmOperand &remBits = CreateImmOperand(dividor - 1, k64BitSize, isSigned); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xandrri13, resOpnd, resOpnd, remBits)); + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xsubrrr, resOpnd, resOpnd, temp)); + return; + } else { + CHECK_FATAL(Log2OfDividor < k32BitSize, "imm out of bound"); + AArch64ImmOperand &rightShiftValue = CreateImmOperand(k32BitSize - Log2OfDividor, k32BitSize, isSigned); + if (Log2OfDividor != 1) { + /* 31->shift ALL , 32 ->32bit register */ + AArch64ImmOperand &rightShiftAll = CreateImmOperand(31, k32BitSize, isSigned); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wasrrri5, temp, opnd0, rightShiftAll)); + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wlsrrri5, temp, temp, rightShiftValue)); + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wlsrrri5, temp, opnd0, rightShiftValue)); + } + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_waddrrr, resOpnd, opnd0, temp)); + + AArch64ImmOperand &remBits = CreateImmOperand(dividor - 1, k32BitSize, isSigned); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wandrri12, resOpnd, resOpnd, remBits)); + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wsubrrr, resOpnd, resOpnd, temp)); + return; + } + } + } + uint32 mopDiv = is64Bits ? (isSigned ? MOP_xsdivrrr : MOP_xudivrrr) : (isSigned ? MOP_wsdivrrr : MOP_wudivrrr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopDiv, temp, opnd0, opnd1)); + + uint32 mopSub = is64Bits ? MOP_xmsubrrrr : MOP_wmsubrrrr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopSub, resOpnd, temp, opnd1, opnd0)); +} + +Operand *AArch64CGFunc::SelectRem(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + ASSERT(IsPrimitiveInteger(dtype), "wrong type for rem"); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + + /* promoted type */ + PrimType primType = ((is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32))); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectRem(resOpnd, opnd0, opnd1, primType, isSigned, is64Bits); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectLand(BinaryNode &node, Operand &lhsOpnd, Operand &rhsOpnd) { + PrimType primType = node.GetPrimType(); + ASSERT(IsPrimitiveInteger(primType), "Land should be integer type"); + bool is64Bits = (GetPrimTypeBitSize(primType) == k64BitSize); + RegOperand &resOpnd = CreateRegisterOperandOfType(is64Bits ? PTY_u64 : PTY_u32); + /* + * OP0 band Op1 + * cmp OP0, 0 # compare X0 with 0, sets Z bit + * ccmp OP1, 0, 4 //==0100b, ne # if(OP0!=0) cmp Op1 and 0, else NZCV <- 0100 makes OP0==0 + * cset RES, ne # if Z==1(i.e., OP0==0||OP1==0) RES<-0, RES<-1 + */ + Operand &opnd0 = LoadIntoRegister(lhsOpnd, primType); + SelectAArch64Cmp(opnd0, CreateImmOperand(0, primType, false), true, GetPrimTypeBitSize(primType)); + Operand &opnd1 = LoadIntoRegister(rhsOpnd, primType); + SelectAArch64CCmp(opnd1, CreateImmOperand(0, primType, false), CreateImmOperand(4, PTY_u8, false), + GetCondOperand(CC_NE), is64Bits); + SelectAArch64CSet(resOpnd, GetCondOperand(CC_NE), is64Bits); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectLor(BinaryNode &node, Operand &opnd0, Operand &opnd1, bool parentIsBr) { + PrimType primType = node.GetPrimType(); + ASSERT(IsPrimitiveInteger(primType), "Lior should be integer type"); + bool is64Bits = (GetPrimTypeBitSize(primType) == k64BitSize); + RegOperand &resOpnd = CreateRegisterOperandOfType(is64Bits ? PTY_u64 : PTY_u32); + /* + * OP0 band Op1 + * cmp OP0, 0 # compare X0 with 0, sets Z bit + * ccmp OP1, 0, 0 //==0100b, eq # if(OP0==0,eq) cmp Op1 and 0, else NZCV <- 0000 makes OP0!=0 + * cset RES, ne # if Z==1(i.e., OP0==0&&OP1==0) RES<-0, RES<-1 + */ + if (parentIsBr && !is64Bits && opnd0.IsRegister() && (static_cast(&opnd0)->GetValidBitsNum() == 1) && + opnd1.IsRegister() && (static_cast(&opnd1)->GetValidBitsNum() == 1)) { + uint32 mOp = MOP_wiorrrr; + static_cast(resOpnd).SetValidBitsNum(1); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, opnd1)); + } else { + SelectBior(resOpnd, opnd0, opnd1, primType); + SelectAArch64Cmp(resOpnd, CreateImmOperand(0, primType, false), true, GetPrimTypeBitSize(primType)); + SelectAArch64CSet(resOpnd, GetCondOperand(CC_NE), is64Bits); + } + return &resOpnd; +} + +void AArch64CGFunc::SelectCmpOp(Operand &resOpnd, Operand &lhsOpnd, Operand &rhsOpnd, + Opcode opcode, PrimType primType) { + uint32 dsize = resOpnd.GetSize(); + bool isFloat = IsPrimitiveFloat(primType); + Operand &opnd0 = LoadIntoRegister(lhsOpnd, primType); + + /* + * most of FP constants are passed as AArch64MemOperand + * except 0.0 which is passed as kOpdFPZeroImmediate + */ + Operand::OperandType opnd1Type = rhsOpnd.GetKind(); + Operand *opnd1 = &rhsOpnd; + if ((opnd1Type != Operand::kOpdImmediate) && (opnd1Type != Operand::kOpdFPZeroImmediate) && + (opnd1Type != Operand::kOpdOffset)) { + opnd1 = &LoadIntoRegister(rhsOpnd, primType); + } + + bool unsignedIntegerComparison = !isFloat && !IsSignedInteger(primType); + /* + * OP_cmp, OP_cmpl, OP_cmpg + * OP0, OP1 ; fcmp for OP_cmpl/OP_cmpg, cmp/fcmpe for OP_cmp + * CSINV RES, WZR, WZR, GE + * CSINC RES, RES, WZR, LE + * if OP_cmpl, CSINV RES, RES, WZR, VC (no overflow) + * if OP_cmpg, CSINC RES, RES, WZR, VC (no overflow) + */ + AArch64RegOperand &xzr = AArch64RegOperand::GetZeroRegister(dsize); + if ((opcode == OP_cmpl) || (opcode == OP_cmpg)) { + ASSERT(isFloat, "incorrect operand types"); + SelectTargetFPCmpQuiet(opnd0, *opnd1, GetPrimTypeBitSize(primType)); + SelectAArch64CSINV(resOpnd, xzr, xzr, GetCondOperand(CC_GE), (dsize == k64BitSize)); + SelectAArch64CSINC(resOpnd, resOpnd, xzr, GetCondOperand(CC_LE), (dsize == k64BitSize)); + if (opcode == OP_cmpl) { + SelectAArch64CSINV(resOpnd, resOpnd, xzr, GetCondOperand(CC_VC), (dsize == k64BitSize)); + } else { + SelectAArch64CSINC(resOpnd, resOpnd, xzr, GetCondOperand(CC_VC), (dsize == k64BitSize)); + } + return; + } + + if (opcode == OP_cmp) { + SelectAArch64Cmp(opnd0, *opnd1, !isFloat, GetPrimTypeBitSize(primType)); + if (unsignedIntegerComparison) { + SelectAArch64CSINV(resOpnd, xzr, xzr, GetCondOperand(CC_HS), (dsize == k64BitSize)); + SelectAArch64CSINC(resOpnd, resOpnd, xzr, GetCondOperand(CC_LS), (dsize == k64BitSize)); + } else { + SelectAArch64CSINV(resOpnd, xzr, xzr, GetCondOperand(CC_GE), (dsize == k64BitSize)); + SelectAArch64CSINC(resOpnd, resOpnd, xzr, GetCondOperand(CC_LE), (dsize == k64BitSize)); + } + return; + } + + static_cast(&resOpnd)->SetValidBitsNum(1); + if ((opcode == OP_lt) && opnd0.IsRegister() && opnd1->IsImmediate() && + (static_cast(opnd1)->GetValue() == 0)) { + bool is64Bits = (opnd0.GetSize() == k64BitSize); + if (!unsignedIntegerComparison) { + int32 bitLen = is64Bits ? kBitLenOfShift64Bits : kBitLenOfShift32Bits; + ImmOperand &shiftNum = CreateImmOperand(is64Bits ? kHighestBitOf64Bits : kHighestBitOf32Bits, bitLen, false); + MOperator mOpCode = is64Bits ? MOP_xlsrrri6 : MOP_wlsrrri5; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, resOpnd, opnd0, shiftNum)); + return; + } + ImmOperand &constNum = CreateImmOperand(0, is64Bits ? k64BitSize : k32BitSize, false); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(is64Bits ? MOP_xmovri64 : MOP_xmovri32, resOpnd, constNum)); + return; + } + SelectAArch64Cmp(opnd0, *opnd1, !isFloat, GetPrimTypeBitSize(primType)); + + AArch64CC_t cc = CC_EQ; + switch (opcode) { + case OP_eq: + cc = CC_EQ; + break; + case OP_ne: + cc = CC_NE; + break; + case OP_le: + cc = unsignedIntegerComparison ? CC_LS : CC_LE; + break; + case OP_ge: + cc = unsignedIntegerComparison ? CC_HS : CC_GE; + break; + case OP_gt: + cc = unsignedIntegerComparison ? CC_HI : CC_GT; + break; + case OP_lt: + cc = unsignedIntegerComparison ? CC_LO : CC_LT; + break; + default: + CHECK_FATAL(false, "illegal logical operator"); + } + SelectAArch64CSet(resOpnd, GetCondOperand(cc), (dsize == k64BitSize)); +} + +Operand *AArch64CGFunc::SelectCmpOp(CompareNode &node, Operand &opnd0, Operand &opnd1) { + RegOperand &resOpnd = CreateRegisterOperandOfType(node.GetPrimType()); + SelectCmpOp(resOpnd, opnd0, opnd1, node.GetOpCode(), node.GetOpndType()); + return &resOpnd; +} + +void AArch64CGFunc::SelectTargetFPCmpQuiet(Operand &o0, Operand &o1, uint32 dsize) { + MOperator mOpCode = 0; + if (o1.GetKind() == Operand::kOpdFPZeroImmediate) { + mOpCode = (dsize == k64BitSize) ? MOP_dcmpqri : (dsize == k32BitSize) ? MOP_scmpqri : MOP_hcmpqri; + } else if (o1.GetKind() == Operand::kOpdRegister) { + mOpCode = (dsize == k64BitSize) ? MOP_dcmpqrr : (dsize == k32BitSize) ? MOP_scmpqrr : MOP_hcmpqrr; + } else { + CHECK_FATAL(false, "unsupported operand type"); + } + Operand &rflag = GetOrCreateRflag(); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, rflag, o0, o1)); +} + +void AArch64CGFunc::SelectAArch64Cmp(Operand &o0, Operand &o1, bool isIntType, uint32 dsize) { + MOperator mOpCode = 0; + Operand *newO1 = &o1; + if (isIntType) { + if ((o1.GetKind() == Operand::kOpdImmediate) || (o1.GetKind() == Operand::kOpdOffset)) { + ImmOperand *immOpnd = static_cast(&o1); + /* + * imm : 0 ~ 4095, shift: none, LSL #0, or LSL #12 + * aarch64 assembly takes up to 24-bits, if the lower 12 bits is all 0 + */ + if (immOpnd->IsInBitSize(kMaxImmVal12Bits, 0) || immOpnd->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits)) { + mOpCode = (dsize == k64BitSize) ? MOP_xcmpri : MOP_wcmpri; + } else { + /* load into register */ + PrimType ptype = (dsize == k64BitSize) ? PTY_i64 : PTY_i32; + newO1 = &SelectCopy(o1, ptype, ptype); + mOpCode = (dsize == k64BitSize) ? MOP_xcmprr : MOP_wcmprr; + } + } else if (o1.GetKind() == Operand::kOpdRegister) { + mOpCode = (dsize == k64BitSize) ? MOP_xcmprr : MOP_wcmprr; + } else { + CHECK_FATAL(false, "unsupported operand type"); + } + } else { /* float */ + if (o1.GetKind() == Operand::kOpdFPZeroImmediate) { + mOpCode = (dsize == k64BitSize) ? MOP_dcmperi : ((dsize == k32BitSize) ? MOP_scmperi : MOP_hcmperi); + } else if (o1.GetKind() == Operand::kOpdRegister) { + mOpCode = (dsize == k64BitSize) ? MOP_dcmperr : ((dsize == k32BitSize) ? MOP_scmperr : MOP_hcmperr); + } else { + CHECK_FATAL(false, "unsupported operand type"); + } + } + ASSERT(mOpCode != 0, "mOpCode undefined"); + Operand &rflag = GetOrCreateRflag(); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, rflag, o0, *newO1)); +} + +void AArch64CGFunc::SelectAArch64CCmp(Operand &o, Operand &i, Operand &nzcv, CondOperand &cond, bool is64Bits) { + uint32 mOpCode = is64Bits ? MOP_xccmpriic : MOP_wccmpriic; + Operand &rflag = GetOrCreateRflag(); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, rflag, o, i, nzcv, cond)); +} + +void AArch64CGFunc::SelectAArch64CSet(Operand &r, CondOperand &cond, bool is64Bits) { + MOperator mOpCode = is64Bits ? MOP_xcsetrc : MOP_wcsetrc; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, r, cond)); +} + +void AArch64CGFunc::SelectAArch64CSINV(Operand &res, Operand &o0, Operand &o1, CondOperand &cond, bool is64Bits) { + MOperator mOpCode = is64Bits ? MOP_xcsinvrrrc : MOP_wcsinvrrrc; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, res, o0, o1, cond)); +} + +void AArch64CGFunc::SelectAArch64CSINC(Operand &res, Operand &o0, Operand &o1, CondOperand &cond, bool is64Bits) { + MOperator mOpCode = is64Bits ? MOP_xcsincrrrc : MOP_wcsincrrrc; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, res, o0, o1, cond)); +} + +Operand *AArch64CGFunc::SelectBand(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + return SelectRelationOperator(kAND, node, opnd0, opnd1); +} + +void AArch64CGFunc::SelectBand(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + SelectRelationOperator(kAND, resOpnd, opnd0, opnd1, primType); +} + +Operand *AArch64CGFunc::SelectRelationOperator(RelationOperator operatorCode, const BinaryNode &node, Operand &opnd0, + Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + PrimType primType = is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32); /* promoted type */ + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectRelationOperator(operatorCode, resOpnd, opnd0, opnd1, primType); + return &resOpnd; +} + +MOperator AArch64CGFunc::SelectRelationMop(RelationOperator operatorCode, + RelationOperatorOpndPattern opndPattern, bool is64Bits, + bool isBitmaskImmediate, bool isBitNumLessThan16) const { + MOperator mOp = MOP_undef; + if (opndPattern == kRegReg) { + switch (operatorCode) { + case kAND: + mOp = is64Bits ? MOP_xandrrr : MOP_wandrrr; + break; + case kIOR: + mOp = is64Bits ? MOP_xiorrrr : MOP_wiorrrr; + break; + case kEOR: + mOp = is64Bits ? MOP_xeorrrr : MOP_weorrrr; + break; + default: + break; + } + return mOp; + } + /* opndPattern == KRegImm */ + if (isBitmaskImmediate) { + switch (operatorCode) { + case kAND: + mOp = is64Bits ? MOP_xandrri13 : MOP_wandrri12; + break; + case kIOR: + mOp = is64Bits ? MOP_xiorrri13 : MOP_wiorrri12; + break; + case kEOR: + mOp = is64Bits ? MOP_xeorrri13 : MOP_weorrri12; + break; + default: + break; + } + return mOp; + } + /* normal imm value */ + if (isBitNumLessThan16) { + switch (operatorCode) { + case kAND: + mOp = is64Bits ? MOP_xandrrrs : MOP_wandrrrs; + break; + case kIOR: + mOp = is64Bits ? MOP_xiorrrrs : MOP_wiorrrrs; + break; + case kEOR: + mOp = is64Bits ? MOP_xeorrrrs : MOP_weorrrrs; + break; + default: + break; + } + return mOp; + } + return mOp; +} + +void AArch64CGFunc::SelectRelationOperator(RelationOperator operatorCode, Operand &resOpnd, Operand &opnd0, + Operand &opnd1, PrimType primType) { + Operand::OperandType opnd0Type = opnd0.GetKind(); + Operand::OperandType opnd1Type = opnd1.GetKind(); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + /* op #imm. #imm */ + if ((opnd0Type != Operand::kOpdRegister) && (opnd1Type != Operand::kOpdRegister)) { + SelectRelationOperator(operatorCode, resOpnd, SelectCopy(opnd0, primType, primType), opnd1, primType); + return; + } + /* op #imm, reg -> op reg, #imm */ + if ((opnd0Type != Operand::kOpdRegister) && (opnd1Type == Operand::kOpdRegister)) { + SelectRelationOperator(operatorCode, resOpnd, opnd1, opnd0, primType); + return; + } + /* op reg, reg */ + if ((opnd0Type == Operand::kOpdRegister) && (opnd1Type == Operand::kOpdRegister)) { + ASSERT(IsPrimitiveInteger(primType), "NYI band"); + MOperator mOp = SelectRelationMop(operatorCode, kRegReg, is64Bits, false, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, opnd1)); + return; + } + /* op reg, #imm */ + if ((opnd0Type == Operand::kOpdRegister) && (opnd1Type != Operand::kOpdRegister)) { + if (!((opnd1Type == Operand::kOpdImmediate) || (opnd1Type == Operand::kOpdOffset))) { + SelectRelationOperator(operatorCode, resOpnd, opnd0, SelectCopy(opnd1, primType, primType), primType); + return; + } + + AArch64ImmOperand *immOpnd = static_cast(&opnd1); + if (immOpnd->IsZero()) { + if (operatorCode == kAND) { + uint32 mopMv = is64Bits ? MOP_xmovrr : MOP_wmovrr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopMv, resOpnd, + AArch64RegOperand::GetZeroRegister(dsize))); + } else if ((operatorCode == kIOR) || (operatorCode == kEOR)) { + SelectCopy(resOpnd, primType, opnd0, primType); + } + } else if ((is64Bits && immOpnd->IsAllOnes()) || (!is64Bits && immOpnd->IsAllOnes32bit())) { + if (operatorCode == kAND) { + SelectCopy(resOpnd, primType, opnd0, primType); + } else if (operatorCode == kIOR) { + uint32 mopMovn = is64Bits ? MOP_xmovnri16 : MOP_wmovnri16; + ImmOperand &src16 = CreateImmOperand(0, k16BitSize, false); + LogicalShiftLeftOperand *lslOpnd = GetLogicalShiftLeftOperand(0, is64Bits); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(mopMovn, resOpnd, src16, *lslOpnd)); + } else if (operatorCode == kEOR) { + SelectMvn(resOpnd, opnd0, primType); + } + } else if (immOpnd->IsBitmaskImmediate()) { + MOperator mOp = SelectRelationMop(operatorCode, kRegImm, is64Bits, true, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, opnd1)); + } else { + int64 immVal = immOpnd->GetValue(); + int32 tail0BitNum = GetTail0BitNum(immVal); + int32 head0BitNum = GetHead0BitNum(immVal); + const int32 bitNum = k64BitSize - head0BitNum - tail0BitNum; + RegOperand ®Opnd = CreateRegisterOperandOfType(primType); + + if (bitNum <= k16ValidBit) { + int64 newImm = (static_cast(immVal) >> static_cast(tail0BitNum)) & 0xFFFF; + AArch64ImmOperand &immOpnd1 = CreateImmOperand(newImm, k16BitSize, false); + SelectCopyImm(regOpnd, immOpnd1, primType); + MOperator mOp = SelectRelationMop(operatorCode, kRegImm, is64Bits, false, true); + int32 bitLen = is64Bits ? kBitLenOfShift64Bits : kBitLenOfShift32Bits; + BitShiftOperand &shiftOpnd = CreateBitShiftOperand(BitShiftOperand::kLSL, tail0BitNum, bitLen); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, regOpnd, shiftOpnd)); + } else { + SelectCopyImm(regOpnd, *immOpnd, primType); + MOperator mOp = SelectRelationMop(operatorCode, kRegReg, is64Bits, false, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0, regOpnd)); + } + } + } +} + +Operand *AArch64CGFunc::SelectBior(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + return SelectRelationOperator(kIOR, node, opnd0, opnd1); +} + +void AArch64CGFunc::SelectBior(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + SelectRelationOperator(kIOR, resOpnd, opnd0, opnd1, primType); +} + +Operand *AArch64CGFunc::SelectMinOrMax(bool isMin, const BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(dtype); + /* promoted type */ + PrimType primType = isFloat ? dtype : (is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32)); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectMinOrMax(isMin, resOpnd, opnd0, opnd1, primType); + return &resOpnd; +} + +void AArch64CGFunc::SelectMinOrMax(bool isMin, Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + if (IsPrimitiveInteger(primType)) { + RegOperand ®Opnd0 = LoadIntoRegister(opnd0, primType); + Operand ®Opnd1 = LoadIntoRegister(opnd1, primType); + SelectAArch64Cmp(regOpnd0, regOpnd1, true, dsize); + Operand &newResOpnd = LoadIntoRegister(resOpnd, primType); + if (isMin) { + CondOperand &cc = IsSignedInteger(primType) ? GetCondOperand(CC_LT) : GetCondOperand(CC_LO); + SelectAArch64Select(newResOpnd, regOpnd0, regOpnd1, cc, true, dsize); + } else { + CondOperand &cc = IsSignedInteger(primType) ? GetCondOperand(CC_GT) : GetCondOperand(CC_HI); + SelectAArch64Select(newResOpnd, regOpnd0, regOpnd1, cc, true, dsize); + } + } else if (IsPrimitiveFloat(primType)) { + RegOperand ®Opnd0 = LoadIntoRegister(opnd0, primType); + RegOperand ®Opnd1 = LoadIntoRegister(opnd1, primType); + SelectFMinFMax(resOpnd, regOpnd0, regOpnd1, is64Bits, isMin); + } else { + CHECK_FATAL(false, "NIY type max or min"); + } +} + +Operand *AArch64CGFunc::SelectMin(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + return SelectMinOrMax(true, node, opnd0, opnd1); +} + +void AArch64CGFunc::SelectMin(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + SelectMinOrMax(true, resOpnd, opnd0, opnd1, primType); +} + +Operand *AArch64CGFunc::SelectMax(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + return SelectMinOrMax(false, node, opnd0, opnd1); +} + +void AArch64CGFunc::SelectMax(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + SelectMinOrMax(false, resOpnd, opnd0, opnd1, primType); +} + +void AArch64CGFunc::SelectFMinFMax(Operand &resOpnd, Operand &opnd0, Operand &opnd1, bool is64Bits, bool isMin) { + uint32 mOpCode = isMin ? (is64Bits ? MOP_xfminrrr : MOP_wfminrrr) : (is64Bits ? MOP_xfmaxrrr : MOP_wfmaxrrr); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, resOpnd, opnd0, opnd1)); +} + +Operand *AArch64CGFunc::SelectBxor(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + return SelectRelationOperator(kEOR, node, opnd0, opnd1); +} + +void AArch64CGFunc::SelectBxor(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) { + SelectRelationOperator(kEOR, resOpnd, opnd0, opnd1, primType); +} + +Operand *AArch64CGFunc::SelectShift(BinaryNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + bool isSigned = IsSignedInteger(dtype); + uint32 dsize = GetPrimTypeBitSize(dtype); + bool is64Bits = (dsize == k64BitSize); + bool isFloat = IsPrimitiveFloat(dtype); + /* promoted type */ + PrimType primType = isFloat ? dtype : (is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32)); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + Opcode opcode = node.GetOpCode(); + ShiftDirection direct = (opcode == OP_lshr) ? kShiftLright : ((opcode == OP_ashr) ? kShiftAright : kShiftLeft); + SelectShift(resOpnd, opnd0, opnd1, direct, primType); + return &resOpnd; +} + +void AArch64CGFunc::SelectBxorShift(Operand &resOpnd, Operand *opnd0, Operand *opnd1, Operand &opnd2, + PrimType primType) { + opnd0 = &LoadIntoRegister(*opnd0, primType); + opnd1 = &LoadIntoRegister(*opnd1, primType); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + MOperator mopBxor = is64Bits ? MOP_xeorrrrs : MOP_weorrrrs; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopBxor, resOpnd, *opnd0, *opnd1, opnd2)); +} + +void AArch64CGFunc::SelectShift(Operand &resOpnd, Operand &opnd0, Operand &opnd1, ShiftDirection direct, + PrimType primType) { + Operand::OperandType opnd1Type = opnd1.GetKind(); + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + Operand *firstOpnd = &LoadIntoRegister(opnd0, primType); + + MOperator mopShift; + if ((opnd1Type == Operand::kOpdImmediate) || (opnd1Type == Operand::kOpdOffset)) { + AArch64ImmOperand *immOpnd1 = static_cast(&opnd1); + const int64 kVal = immOpnd1->GetValue(); + const uint32 kShiftamt = is64Bits ? kHighestBitOf64Bits : kHighestBitOf32Bits; + if (kVal == 0) { + SelectCopy(resOpnd, primType, *firstOpnd, primType); + return; + } + /* e.g. a >> -1 */ + if ((kVal < 0) || (kVal > kShiftamt)) { + SelectShift(resOpnd, *firstOpnd, SelectCopy(opnd1, primType, primType), direct, primType); + return; + } + switch (direct) { + case kShiftLeft: + if (kVal == 1) { + SelectAdd(resOpnd, *firstOpnd, *firstOpnd, primType); + return; + } + mopShift = is64Bits ? MOP_xlslrri6 : MOP_wlslrri5; + break; + case kShiftAright: + mopShift = is64Bits ? MOP_xasrrri6 : MOP_wasrrri5; + break; + case kShiftLright: + mopShift = is64Bits ? MOP_xlsrrri6 : MOP_wlsrrri5; + break; + } + } else if (opnd1Type != Operand::kOpdRegister) { + SelectShift(resOpnd, *firstOpnd, SelectCopy(opnd1, primType, primType), direct, primType); + return; + } else { + switch (direct) { + case kShiftLeft: + mopShift = is64Bits ? MOP_xlslrrr : MOP_wlslrrr; + break; + case kShiftAright: + mopShift = is64Bits ? MOP_xasrrrr : MOP_wasrrrr; + break; + case kShiftLright: + mopShift = is64Bits ? MOP_xlsrrrr : MOP_wlsrrrr; + break; + } + } + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopShift, resOpnd, *firstOpnd, opnd1)); +} + +Operand *AArch64CGFunc::SelectAbs(UnaryNode &node, Operand &opnd0) { + PrimType dtyp = node.GetPrimType(); + if (IsPrimitiveFloat(dtyp)) { + CHECK_FATAL(GetPrimTypeBitSize(dtyp) >= k32BitSize, "We don't support hanf-word FP operands yet"); + bool is64Bits = (GetPrimTypeBitSize(dtyp) == k64BitSize); + Operand &newOpnd0 = LoadIntoRegister(opnd0, dtyp); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtyp); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(is64Bits ? MOP_dabsrr : MOP_sabsrr, + resOpnd, newOpnd0)); + return &resOpnd; + } else { + bool is64Bits = (GetPrimTypeBitSize(dtyp) == k64BitSize); + /* promoted type */ + PrimType primType = is64Bits ? (PTY_i64) : (PTY_i32); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + Operand &newOpnd0 = LoadIntoRegister(opnd0, primType); + SelectAArch64Cmp(newOpnd0, CreateImmOperand(0, is64Bits ? PTY_u64 : PTY_u32, false), + true, GetPrimTypeBitSize(dtyp)); + uint32 mopCsneg = is64Bits ? MOP_xcsnegrrrc : MOP_wcsnegrrrc; + /* ABS requires the operand be interpreted as a signed integer */ + CondOperand &condOpnd = GetCondOperand(CC_GE); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopCsneg, resOpnd, newOpnd0, newOpnd0, condOpnd)); + return &resOpnd; + } +} + +Operand *AArch64CGFunc::SelectBnot(UnaryNode &node, Operand &opnd0) { + PrimType dtype = node.GetPrimType(); + ASSERT(IsPrimitiveInteger(dtype), "bnot expect integer or NYI"); + bool is64Bits = (GetPrimTypeBitSize(dtype) == k64BitSize); + bool isSigned = IsSignedInteger(dtype); + /* promoted type */ + PrimType primType = is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + + Operand &newOpnd0 = LoadIntoRegister(opnd0, primType); + + uint32 mopBnot = is64Bits ? MOP_xnotrr : MOP_wnotrr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopBnot, resOpnd, newOpnd0)); + + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectExtractbits(ExtractbitsNode &node, Operand &srcOpnd) { + PrimType dtype = node.GetPrimType(); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtype); + bool isSigned = IsSignedInteger(dtype); + uint8 bitOffset = node.GetBitsOffset(); + uint8 bitSize = node.GetBitsSize(); + bool is64Bits = (GetPrimTypeBitSize(dtype) == k64BitSize); + uint32 immWidth = is64Bits ? kMaxImmVal13Bits : kMaxImmVal12Bits; + Operand &opnd0 = LoadIntoRegister(srcOpnd, dtype); + if ((bitOffset == 0) && !isSigned && (bitSize < immWidth)) { + SelectBand(resOpnd, opnd0, CreateImmOperand((static_cast(1) << bitSize) - 1, immWidth, false), dtype); + return &resOpnd; + } + uint32 mopBfx = + is64Bits ? (isSigned ? MOP_xsbfxrri6i6 : MOP_xubfxrri6i6) : (isSigned ? MOP_wsbfxrri5i5 : MOP_wubfxrri5i5); + AArch64ImmOperand &immOpnd1 = CreateImmOperand(bitOffset, k8BitSize, false); + AArch64ImmOperand &immOpnd2 = CreateImmOperand(bitSize, k8BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopBfx, resOpnd, opnd0, immOpnd1, immOpnd2)); + return &resOpnd; +} + +/* + * operand fits in MOVK if + * is64Bits && boffst == 0, 16, 32, 48 && bSize == 16, so boffset / 16 == 0, 1, 2, 3; (boffset / 16 ) & (~3) == 0 + * or is32Bits && boffset == 0, 16 && bSize == 16, so boffset / 16 == 0, 1; (boffset / 16) & (~1) == 0 + */ +inline bool IsMoveWideKeepable(uint32 bitOffset, uint32 bitSize, bool is64Bits) { + ASSERT(is64Bits || (bitOffset < k32BitSize), ""); + return (bitSize == k16BitSize && ((bitOffset >> k16BitShift) & ~static_cast(is64Bits ? 0x3 : 0x1)) == 0); +} + +/* we use the fact that A ^ B ^ A == B, A ^ 0 = A */ +void AArch64CGFunc::SelectDepositBits(Operand &resOpnd, Operand &opnd0, Operand &opnd1, uint32 bitOffset, + uint32 bitSize, PrimType regType) { + RegOperand &t1opnd = CreateRegisterOperandOfType(regType); + bool is64Bits = GetPrimTypeBitSize(regType) == k64BitSize; + /* + * if operand 1 is immediate and fits in MOVK, use it + * MOVK Wd, #imm{, LSL #shift} ; 32-bit general registers + * MOVK Xd, #imm{, LSL #shift} ; 64-bit general registers + */ + if (opnd1.IsIntImmediate() && IsMoveWideKeepable(bitOffset, bitSize, is64Bits)) { + SelectCopy(resOpnd, regType, opnd0, regType); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction((is64Bits ? MOP_xmovkri16 : MOP_wmovkri16), + resOpnd, opnd1, + *GetLogicalShiftLeftOperand(bitOffset, is64Bits))); + } else { + /* + * Merge-form of Itanium deposit + * 1. (opnd0>>bitsOffset) ^ opnd1 + */ + int32 bitLen = is64Bits ? kBitLenOfShift64Bits : kBitLenOfShift32Bits; + Operand &shiftOpnd = CreateBitShiftOperand(BitShiftOperand::kLSR, bitOffset, bitLen); + /* bit-shift the first operand to the right by offset and XOR with the second operand */ + SelectBxorShift(t1opnd, &opnd1, &opnd0, shiftOpnd, regType); + /* + * bit-shift the result to the left by offset, retain size bits from offset, clear the rest. + * ubfiz t1opnd, bitsOffset, size + */ + uint32 mopUbfiz = is64Bits ? MOP_xubfizrri6i6 : MOP_wubfizrri5i5; + /* XOR the result with the first operand */ + AArch64ImmOperand &immOpnd1 = CreateImmOperand(bitOffset, k8BitSize, false); + AArch64ImmOperand &immOpnd2 = CreateImmOperand(bitSize, k8BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopUbfiz, t1opnd, t1opnd, immOpnd1, immOpnd2)); + /* opnd0 ^ t1opnd */ + SelectBxor(resOpnd, opnd0, t1opnd, regType); + } +} + +Operand *AArch64CGFunc::SelectDepositBits(DepositbitsNode &node, Operand &opnd0, Operand &opnd1) { + PrimType dtype = node.GetPrimType(); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtype); + SelectDepositBits(resOpnd, opnd0, opnd1, node.GetBitsOffset(), node.GetBitsSize(), dtype); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectLnot(UnaryNode &node, Operand &srcOpnd) { + PrimType dtype = node.GetPrimType(); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtype); + bool is64Bits = (GetPrimTypeBitSize(dtype) == k64BitSize); + Operand &opnd0 = LoadIntoRegister(srcOpnd, dtype); + SelectAArch64Cmp(opnd0, CreateImmOperand(0, is64Bits ? PTY_u64 : PTY_u32, false), true, GetPrimTypeBitSize(dtype)); + SelectAArch64CSet(resOpnd, GetCondOperand(CC_EQ), is64Bits); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectNeg(UnaryNode &node, Operand &opnd0) { + PrimType dtype = node.GetPrimType(); + bool is64Bits = (GetPrimTypeBitSize(dtype) == k64BitSize); + PrimType primType; + if (IsPrimitiveFloat(dtype)) { + primType = dtype; + } else { + primType = is64Bits ? (PTY_i64) : (PTY_i32); /* promoted type */ + } + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + SelectNeg(resOpnd, opnd0, primType); + return &resOpnd; +} + +void AArch64CGFunc::SelectNeg(Operand &dest, Operand &srcOpnd, PrimType primType) { + Operand &opnd0 = LoadIntoRegister(srcOpnd, primType); + bool is64Bits = (GetPrimTypeBitSize(primType) == k64BitSize); + MOperator mOp; + if (IsPrimitiveFloat(primType)) { + mOp = is64Bits ? MOP_xfnegrr : MOP_wfnegrr; + } else { + mOp = is64Bits ? MOP_xinegrr : MOP_winegrr; + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, dest, opnd0)); +} + +void AArch64CGFunc::SelectMvn(Operand &dest, Operand &src, PrimType primType) { + Operand &opnd0 = LoadIntoRegister(src, primType); + bool is64Bits = (GetPrimTypeBitSize(primType) == k64BitSize); + MOperator mOp; + ASSERT(!IsPrimitiveFloat(primType), "Instruction 'mvn' do not have float version."); + mOp = is64Bits ? MOP_xnotrr : MOP_wnotrr; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, dest, opnd0)); +} + +Operand *AArch64CGFunc::SelectRecip(UnaryNode &node, Operand &src) { + /* + * fconsts s15, #112 + * fdivs s0, s15, s0 + */ + PrimType dtype = node.GetPrimType(); + if (!IsPrimitiveFloat(dtype)) { + ASSERT(false, "should be float type"); + return nullptr; + } + Operand &opnd0 = LoadIntoRegister(src, dtype); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtype); + Operand *one = nullptr; + if (GetPrimTypeBitSize(dtype) == k64BitSize) { + MIRDoubleConst *c = memPool->New(1.0, *GlobalTables::GetTypeTable().GetTypeTable().at(PTY_f64)); + one = SelectDoubleConst(*c); + } else if (GetPrimTypeBitSize(dtype) == k32BitSize) { + MIRFloatConst *c = memPool->New(1.0f, *GlobalTables::GetTypeTable().GetTypeTable().at(PTY_f32)); + one = SelectFloatConst(*c); + } else { + CHECK_FATAL(false, "we don't support half-precision fp operations yet"); + } + SelectDiv(resOpnd, *one, opnd0, dtype); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectSqrt(UnaryNode &node, Operand &src) { + /* + * gcc generates code like below for better accurate + * fsqrts s15, s0 + * fcmps s15, s15 + * fmstat + * beq .L4 + * push {r3, lr} + * bl sqrtf + * pop {r3, pc} + * .L4: + * fcpys s0, s15 + * bx lr + */ + PrimType dtype = node.GetPrimType(); + if (!IsPrimitiveFloat(dtype)) { + ASSERT(false, "should be float type"); + return nullptr; + } + bool is64Bits = (GetPrimTypeBitSize(dtype) == k64BitSize); + Operand &opnd0 = LoadIntoRegister(src, dtype); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtype); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(is64Bits ? MOP_vsqrtd : MOP_vsqrts, resOpnd, opnd0)); + return &resOpnd; +} + +void AArch64CGFunc::SelectCvtFloat2Int(Operand &resOpnd, Operand &srcOpnd, PrimType itype, PrimType ftype) { + bool is64BitsFloat = (ftype == PTY_f64); + MOperator mOp = 0; + + ASSERT(((ftype == PTY_f64) || (ftype == PTY_f32)), "wrong from type"); + Operand &opnd0 = LoadIntoRegister(srcOpnd, ftype); + switch (itype) { + case PTY_i32: + mOp = !is64BitsFloat ? MOP_vcvtrf : MOP_vcvtrd; + break; + case PTY_u32: + mOp = !is64BitsFloat ? MOP_vcvturf : MOP_vcvturd; + break; + case PTY_i64: + mOp = !is64BitsFloat ? MOP_xvcvtrf : MOP_xvcvtrd; + break; + case PTY_u64: + mOp = !is64BitsFloat ? MOP_xvcvturf : MOP_xvcvturd; + break; + default: + CHECK_FATAL(false, "unexpected type"); + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0)); +} + +void AArch64CGFunc::SelectCvtInt2Float(Operand &resOpnd, Operand &origOpnd0, PrimType toType, PrimType fromType) { + ASSERT((toType == PTY_f32) || (toType == PTY_f64), "unexpected type"); + bool is64BitsFloat = (toType == PTY_f64); + MOperator mOp = 0; + uint32 fsize = GetPrimTypeBitSize(fromType); + + PrimType itype = (GetPrimTypeBitSize(fromType) == k64BitSize) ? (IsSignedInteger(fromType) ? PTY_i64 : PTY_u64) + : (IsSignedInteger(fromType) ? PTY_i32 : PTY_u32); + + Operand *opnd0 = &LoadIntoRegister(origOpnd0, itype); + + /* need extension before cvt */ + ASSERT(opnd0->IsRegister(), "opnd should be a register operand"); + Operand *srcOpnd = opnd0; + if (IsSignedInteger(fromType) && (fsize < k32BitSize)) { + srcOpnd = &CreateRegisterOperandOfType(itype); + mOp = (fsize == k8BitSize) ? MOP_xsxtb32 : MOP_xsxth32; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, *srcOpnd, *opnd0)); + } + + switch (itype) { + case PTY_i32: + mOp = !is64BitsFloat ? MOP_vcvtfr : MOP_vcvtdr; + break; + case PTY_u32: + mOp = !is64BitsFloat ? MOP_vcvtufr : MOP_vcvtudr; + break; + case PTY_i64: + mOp = !is64BitsFloat ? MOP_xvcvtfr : MOP_xvcvtdr; + break; + case PTY_u64: + mOp = !is64BitsFloat ? MOP_xvcvtufr : MOP_xvcvtudr; + break; + default: + CHECK_FATAL(false, "unexpected type"); + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, *srcOpnd)); +} + +Operand *AArch64CGFunc::SelectRoundLibCall(RoundType roundType, const TypeCvtNode &node, Operand &opnd0) { + PrimType ftype = node.FromType(); + PrimType rtype = node.GetPrimType(); + bool is64Bits = (ftype == PTY_f64); + std::vector opndVec; + RegOperand *resOpnd; + if (is64Bits) { + resOpnd = &GetOrCreatePhysicalRegisterOperand(D0, k64BitSize, kRegTyFloat); + } else { + resOpnd = &GetOrCreatePhysicalRegisterOperand(S0, k32BitSize, kRegTyFloat); + } + opndVec.push_back(resOpnd); + RegOperand ®Opnd0 = LoadIntoRegister(opnd0, ftype); + opndVec.push_back(®Opnd0); + std::string libName; + if (roundType == kCeil) { + libName.assign(is64Bits ? "ceil" : "ceilf"); + } else if (roundType == kFloor) { + libName.assign(is64Bits ? "floor" : "floorf"); + } else { + libName.assign(is64Bits ? "round" : "roundf"); + } + SelectLibCall(libName, opndVec, ftype, rtype); + + return resOpnd; +} + +Operand *AArch64CGFunc::SelectRoundOperator(RoundType roundType, const TypeCvtNode &node, Operand &opnd0) { + PrimType itype = node.GetPrimType(); + if ((mirModule.GetSrcLang() == kSrcLangC) && ((itype == PTY_f64) || (itype == PTY_f32))) { + SelectRoundLibCall(roundType, node, opnd0); + } + PrimType ftype = node.FromType(); + ASSERT(((ftype == PTY_f64) || (ftype == PTY_f32)), "wrong float type"); + bool is64Bits = (ftype == PTY_f64); + RegOperand &resOpnd = CreateRegisterOperandOfType(itype); + RegOperand ®Opnd0 = LoadIntoRegister(opnd0, ftype); + MOperator mop = MOP_undef; + if (roundType == kCeil) { + mop = is64Bits ? MOP_xvcvtps : MOP_vcvtps; + } else if (roundType == kFloor) { + mop = is64Bits ? MOP_xvcvtms : MOP_vcvtms; + } else { + mop = is64Bits ? MOP_xvcvtas : MOP_vcvtas; + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, resOpnd, regOpnd0)); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectCeil(TypeCvtNode &node, Operand &opnd0) { + return SelectRoundOperator(kCeil, node, opnd0); +} + +/* float to int floor */ +Operand *AArch64CGFunc::SelectFloor(TypeCvtNode &node, Operand &opnd0) { + return SelectRoundOperator(kFloor, node, opnd0); +} + +Operand *AArch64CGFunc::SelectRound(TypeCvtNode &node, Operand &opnd0) { + return SelectRoundOperator(kRound, node, opnd0); +} + +static bool LIsPrimitivePointer(PrimType ptype) { + return ((PTY_ptr <= ptype) && (ptype <= PTY_a64)); +} + +Operand *AArch64CGFunc::SelectRetype(TypeCvtNode &node, Operand &opnd0) { + PrimType fromType = node.FromType(); + PrimType toType = node.GetPrimType(); + ASSERT(GetPrimTypeSize(fromType) == GetPrimTypeSize(toType), "retype bit widith doesn' match"); + if (LIsPrimitivePointer(fromType) && LIsPrimitivePointer(toType)) { + return &LoadIntoRegister(opnd0, toType); + } + Operand::OperandType opnd0Type = opnd0.GetKind(); + RegOperand *resOpnd = &CreateRegisterOperandOfType(toType); + if (IsPrimitiveInteger(fromType) || IsPrimitiveFloat(fromType)) { + bool isFromInt = IsPrimitiveInteger(fromType); + bool is64Bits = GetPrimTypeBitSize(fromType) == k64BitSize; + PrimType itype = + isFromInt ? ((GetPrimTypeBitSize(fromType) == k64BitSize) ? (IsSignedInteger(fromType) ? PTY_i64 : PTY_u64) + : (IsSignedInteger(fromType) ? PTY_i32 : PTY_u32)) + : (is64Bits ? PTY_f64 : PTY_f32); + + /* + * if source operand is in memory, + * simply read it as a value of 'toType 'into the dest operand + * and return + */ + if (opnd0Type == Operand::kOpdMem) { + resOpnd = &SelectCopy(opnd0, toType, toType); + return resOpnd; + } + /* according to aarch64 encoding format, convert int to float expression */ + bool isImm = false; + ImmOperand *imm = static_cast(&opnd0); + uint64 val = static_cast(imm->GetValue()); + uint64 canRepreset = is64Bits ? (val & 0xffffffffffff) : (val & 0x7ffff); + uint32 val1 = is64Bits ? (val >> 61) & 0x3 : (val >> 29) & 0x3; + uint32 val2 = is64Bits ? (val >> 54) & 0xff : (val >> 25) & 0x1f; + bool isSame = is64Bits ? ((val2 == 0) || (val2 == 0xff)) : ((val2 == 0) || (val2 == 0x1f)); + canRepreset = (canRepreset == 0) && ((val1 & 0x1) ^ ((val1 & 0x2) >> 1)) && isSame; + Operand *newOpnd0 = &opnd0; + if (IsPrimitiveInteger(fromType) && IsPrimitiveFloat(toType) && canRepreset) { + uint64 temp1 = is64Bits ? (val >> 63) << 7 : (val >> 31) << 7; + uint64 temp2 = is64Bits ? val >> 48 : val >> 19; + int64 imm8 = (temp2 & 0x7f) | temp1; + newOpnd0 = &CreateImmOperand(imm8, k8BitSize, false, kNotVary, true); + isImm = true; + } else { + newOpnd0 = &LoadIntoRegister(opnd0, itype); + } + uint32 mopFmov = + isImm ? is64Bits ? MOP_xdfmovri : MOP_wsfmovri + : isFromInt ? (is64Bits ? MOP_xvmovdr : MOP_xvmovsr) : (is64Bits ? MOP_xvmovrd : MOP_xvmovrs); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mopFmov, *resOpnd, *newOpnd0)); + return resOpnd; + } else { + CHECK_FATAL(false, "NYI retype"); + } + return nullptr; +} + +void AArch64CGFunc::SelectCvtFloat2Float(Operand &resOpnd, Operand &srcOpnd, PrimType fromType, PrimType toType) { + Operand &opnd0 = LoadIntoRegister(srcOpnd, fromType); + MOperator mOp = 0; + switch (toType) { + case PTY_f32: { + CHECK_FATAL(fromType == PTY_f64, "unexpected cvt from type"); + mOp = MOP_xvcvtfd; + break; + } + case PTY_f64: { + CHECK_FATAL(fromType == PTY_f32, "unexpected cvt from type"); + mOp = MOP_xvcvtdf; + break; + } + default: + CHECK_FATAL(false, "unexpected cvt to type"); + } + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, resOpnd, opnd0)); +} + +/* + * This should be regarded only as a reference. + * + * C11 specification. + * 6.3.1.3 Signed and unsigned integers + * 1 When a value with integer type is converted to another integer + * type other than _Bool, if the value can be represented by the + * new type, it is unchanged. + * 2 Otherwise, if the new type is unsigned, the value is converted + * by repeatedly adding or subtracting one more than the maximum + * value that can be represented in the new type until the value + * is in the range of the new type.60) + * 3 Otherwise, the new type is signed and the value cannot be + * represented in it; either the result is implementation-defined + * or an implementation-defined signal is raised. + */ +void AArch64CGFunc::SelectCvtInt2Int(const BaseNode *parent, Operand *&resOpnd, Operand *opnd0, PrimType fromType, + PrimType toType) { + uint32 fsize = GetPrimTypeBitSize(fromType); + uint32 tsize = GetPrimTypeBitSize(toType); + bool isExpand = tsize > fsize; + bool is64Bit = (tsize == k64BitSize); + if ((parent != nullptr) && opnd0->IsIntImmediate() && + ((parent->GetOpCode() == OP_band) || (parent->GetOpCode() == OP_bior) || (parent->GetOpCode() == OP_bxor) || + (parent->GetOpCode() == OP_ashr) || (parent->GetOpCode() == OP_lshr) || (parent->GetOpCode() == OP_shl))) { + ImmOperand *simm = static_cast(opnd0); + ASSERT(simm != nullptr, "simm is nullptr in AArch64CGFunc::SelectCvtInt2Int"); + bool isSign = false; + int64 origValue = simm->GetValue(); + int64 newValue = origValue; + int64 signValue = 0; + if (!isExpand) { + /* 64--->32 */ + if (fsize > tsize) { + if (IsSignedInteger(toType)) { + if (origValue < 0) { + signValue = 0xFFFFFFFFFFFFFFFF & (1ULL << static_cast(tsize)); + } + newValue = static_cast(origValue) & ((1ULL << static_cast(tsize)) - 1u) & + static_cast(signValue); + } else { + newValue = static_cast(origValue) & ((1ULL << static_cast(tsize)) - 1u); + } + } + } + if (IsSignedInteger(toType)) { + isSign = true; + } + resOpnd = &static_cast(CreateImmOperand(newValue, GetPrimTypeSize(toType) * kBitsPerByte, isSign)); + return; + } + if (isExpand) { /* Expansion */ + /* if cvt expr's parent is add,and,xor and some other,we can use the imm version */ + PrimType primType = + ((fsize == k64BitSize) ? (IsSignedInteger(fromType) ? PTY_i64 : PTY_u64) : (IsSignedInteger(fromType) ? + PTY_i32 : PTY_u32)); + opnd0 = &LoadIntoRegister(*opnd0, primType); + + if (IsSignedInteger(fromType)) { + ASSERT((is64Bit || (fsize == k8BitSize || fsize == k16BitSize)), "incorrect from size"); + + MOperator mOp = + (is64Bit ? ((fsize == k8BitSize) ? MOP_xsxtb64 : ((fsize == k16BitSize) ? MOP_xsxth64 : MOP_xsxtw64)) + : ((fsize == k8BitSize) ? MOP_xsxtb32 : MOP_xsxth32)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, *resOpnd, *opnd0)); + } else { + /* Unsigned */ + if (is64Bit) { + if (fsize == k8BitSize) { + ImmOperand &immOpnd = CreateImmOperand(0xff, k64BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xandrri13, *resOpnd, *opnd0, immOpnd)); + } else if (fsize == k16BitSize) { + ImmOperand &immOpnd = CreateImmOperand(0xffff, k64BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xandrri13, *resOpnd, *opnd0, immOpnd)); + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xuxtw64, *resOpnd, *opnd0)); + } + } else { + ASSERT(((fsize == k8BitSize) || (fsize == k16BitSize)), "incorrect from size"); + if (fsize == k8BitSize) { + static_cast(opnd0)->SetValidBitsNum(k8BitSize); + static_cast(resOpnd)->SetValidBitsNum(k8BitSize); + } + if (fromType == PTY_u1) { + static_cast(opnd0)->SetValidBitsNum(1); + static_cast(resOpnd)->SetValidBitsNum(1); + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction((fsize == k8BitSize) ? MOP_xuxtb32 : MOP_xuxth32, + *resOpnd, *opnd0)); + } + } + } else { /* Same size or truncate */ +#ifdef CNV_OPTIMIZE + /* + * No code needed for aarch64 with same reg. + * Just update regno. + */ + RegOperand *reg = static_cast(resOpnd); + reg->regNo = static_cast(opnd0)->regNo; +#else + /* + * This is not really needed if opnd0 is result from a load. + * Hopefully the FE will get rid of the redundant conversions for loads. + */ + PrimType primType = ((fsize == k64BitSize) ? (IsSignedInteger(fromType) ? PTY_i64 : PTY_u64) + : (IsSignedInteger(fromType) ? PTY_i32 : PTY_u32)); + opnd0 = &LoadIntoRegister(*opnd0, primType); + + if (fsize > tsize) { + if (fsize == k64BitSize) { + MOperator mOp = IsSignedInteger(toType) ? MOP_xsbfxrri6i6 : MOP_xubfxrri6i6; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, *resOpnd, *opnd0, + CreateImmOperand(0, k8BitSize, false), + CreateImmOperand(tsize, k8BitSize, false))); + } else { + MOperator mOp = IsSignedInteger(toType) ? MOP_wsbfxrri5i5 : MOP_wubfxrri5i5; + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, *resOpnd, *opnd0, + CreateImmOperand(0, k8BitSize, false), + CreateImmOperand(tsize, k8BitSize, false))); + } + } else { + /* same size, so resOpnd can be set */ + if ((mirModule.IsJavaModule()) || (IsSignedInteger(fromType) == IsSignedInteger(toType)) || + (GetPrimTypeSize(toType) > k4BitSize)) { + AArch64RegOperand *reg = static_cast(resOpnd); + reg->SetRegisterNumber(static_cast(opnd0)->GetRegisterNumber()); + } else if (IsUnsignedInteger(toType)) { + MOperator mop; + switch (toType) { + case PTY_u8: + mop = MOP_xuxtb32; + break; + case PTY_u16: + mop = MOP_xuxth32; + break; + case PTY_u32: + mop = MOP_xuxtw64; + break; + default: + CHECK_FATAL(0, "Unhandled unsigned convert"); + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *resOpnd, *opnd0)); + } else { + /* signed target */ + uint32 size = GetPrimTypeSize(toType); + MOperator mop; + switch (toType) { + case PTY_i8: + mop = (size > k4BitSize) ? MOP_xsxtb64 : MOP_xsxtb32; + break; + case PTY_i16: + mop = (size > k4BitSize) ? MOP_xsxth64 : MOP_xsxth32; + break; + case PTY_i32: + mop = MOP_xsxtw64; + break; + default: + CHECK_FATAL(0, "Unhandled unsigned convert"); + } + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *resOpnd, *opnd0)); + } + } +#endif + } +} + +Operand *AArch64CGFunc::SelectCvt(const BaseNode &parent, TypeCvtNode &node, Operand &opnd0) { + PrimType fromType = node.FromType(); + PrimType toType = node.GetPrimType(); + if (fromType == toType) { + return &opnd0; /* noop */ + } + Operand *resOpnd = &static_cast(CreateRegisterOperandOfType(toType)); + if (IsPrimitiveFloat(toType) && IsPrimitiveInteger(fromType)) { + SelectCvtInt2Float(*resOpnd, opnd0, toType, fromType); + } else if (IsPrimitiveFloat(fromType) && IsPrimitiveInteger(toType)) { + SelectCvtFloat2Int(*resOpnd, opnd0, toType, fromType); + } else if (IsPrimitiveInteger(fromType) && IsPrimitiveInteger(toType)) { + SelectCvtInt2Int(&parent, resOpnd, &opnd0, fromType, toType); + } else { /* both are float type */ + SelectCvtFloat2Float(*resOpnd, opnd0, fromType, toType); + } + return resOpnd; +} + +Operand *AArch64CGFunc::SelectTrunc(TypeCvtNode &node, Operand &opnd0) { + PrimType ftype = node.FromType(); + bool is64Bits = (GetPrimTypeBitSize(node.GetPrimType()) == k64BitSize); + PrimType itype = (is64Bits) ? (IsSignedInteger(node.GetPrimType()) ? PTY_i64 : PTY_u64) + : (IsSignedInteger(node.GetPrimType()) ? PTY_i32 : PTY_u32); /* promoted type */ + RegOperand &resOpnd = CreateRegisterOperandOfType(itype); + SelectCvtFloat2Int(resOpnd, opnd0, itype, ftype); + return &resOpnd; +} + +void AArch64CGFunc::SelectSelect(Operand &resOpnd, Operand &condOpnd, Operand &trueOpnd, Operand &falseOpnd, + PrimType dtype, PrimType ctype) { + ASSERT(&resOpnd != &condOpnd, "resOpnd cannot be the same as condOpnd"); + Operand &newCondOpnd = LoadIntoRegister(condOpnd, ctype); + Operand &newTrueOpnd = LoadIntoRegister(trueOpnd, dtype); + Operand &newFalseOpnd = LoadIntoRegister(falseOpnd, dtype); + + bool isIntType = IsPrimitiveInteger(dtype); + + SelectAArch64Cmp(newCondOpnd, CreateImmOperand(0, ctype, false), true, GetPrimTypeBitSize(ctype)); + ASSERT((IsPrimitiveInteger(dtype) || IsPrimitiveFloat(dtype)), "unknown type for select"); + Operand &newResOpnd = LoadIntoRegister(resOpnd, dtype); + SelectAArch64Select(newResOpnd, newTrueOpnd, newFalseOpnd, + GetCondOperand(CC_NE), isIntType, GetPrimTypeBitSize(dtype)); +} + +Operand *AArch64CGFunc::SelectSelect(TernaryNode &node, Operand &opnd0, Operand &opnd1, Operand &opnd2) { + PrimType dtype = node.GetPrimType(); + PrimType ctype = node.Opnd(0)->GetPrimType(); + RegOperand &resOpnd = CreateRegisterOperandOfType(dtype); + SelectSelect(resOpnd, opnd0, opnd1, opnd2, dtype, ctype); + return &resOpnd; +} + +/* + * syntax: select (, , ) + * must be of integer type. + * and must be of the type given by . + * If is not 0, return . Otherwise, return . + */ +void AArch64CGFunc::SelectAArch64Select(Operand &dest, Operand &o0, Operand &o1, CondOperand &cond, bool isIntType, + uint32 dsize) { + uint32 mOpCode = isIntType ? ((dsize == k64BitSize) ? MOP_xcselrrrc : MOP_wcselrrrc) + : ((dsize == k64BitSize) ? MOP_dcselrrrc + : ((dsize == k32BitSize) ? MOP_scselrrrc : MOP_hcselrrrc)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOpCode, dest, o0, o1, cond)); +} + +void AArch64CGFunc::SelectRangeGoto(RangeGotoNode &rangeGotoNode, Operand &srcOpnd) { + const SmallCaseVector &switchTable = rangeGotoNode.GetRangeGotoTable(); + MIRType *etype = GlobalTables::GetTypeTable().GetTypeFromTyIdx((TyIdx)PTY_a64); + /* + * we store 8-byte displacement ( jump_label - offset_table_address ) + * in the table. Refer to AArch64Emit::Emit() in aarch64emit.cpp + */ + std::vector sizeArray; + sizeArray.emplace_back(switchTable.size()); + MIRArrayType *arrayType = memPool->New(etype->GetTypeIndex(), sizeArray); + MIRAggConst *arrayConst = memPool->New(mirModule, *arrayType); + for (const auto &itPair : switchTable) { + LabelIdx labelIdx = itPair.second; + GetCurBB()->PushBackRangeGotoLabel(labelIdx); + MIRConst *mirConst = memPool->New(labelIdx, GetFunction().GetPuidx(), *etype); + arrayConst->PushBack(mirConst); + } + + MIRSymbol *lblSt = GetFunction().GetSymTab()->CreateSymbol(kScopeLocal); + lblSt->SetStorageClass(kScFstatic); + lblSt->SetSKind(kStConst); + lblSt->SetTyIdx(arrayType->GetTypeIndex()); + lblSt->SetKonst(arrayConst); + std::string lblStr(".LB_"); + MIRSymbol *funcSt = GlobalTables::GetGsymTable().GetSymbolFromStidx(GetFunction().GetStIdx().Idx()); + uint32 labelIdxTmp = GetLabelIdx(); + lblStr.append(funcSt->GetName()).append(std::to_string(labelIdxTmp++)); + SetLabelIdx(labelIdxTmp); + lblSt->SetNameStrIdx(lblStr); + AddEmitSt(*lblSt); + + PrimType itype = rangeGotoNode.Opnd(0)->GetPrimType(); + Operand &opnd0 = LoadIntoRegister(srcOpnd, itype); + + regno_t vRegNO = NewVReg(kRegTyInt, 8u); + RegOperand *addOpnd = &CreateVirtualRegisterOperand(vRegNO); + + int32 minIdx = switchTable[0].first; + SelectAdd(*addOpnd, opnd0, + CreateImmOperand(-minIdx - rangeGotoNode.GetTagOffset(), GetPrimTypeBitSize(itype), true), itype); + + /* contains the index */ + if (addOpnd->GetSize() != GetPrimTypeBitSize(PTY_u64)) { + addOpnd = static_cast(&SelectCopy(*addOpnd, PTY_u64, PTY_u64)); + } + + RegOperand &baseOpnd = CreateRegisterOperandOfType(PTY_u64); + StImmOperand &stOpnd = CreateStImmOperand(*lblSt, 0, 0); + + /* load the address of the switch table */ + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrp, baseOpnd, stOpnd)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrpl12, baseOpnd, baseOpnd, stOpnd)); + + /* load the displacement into a register by accessing memory at base + index*8 */ + Operand *disp = + memPool->New(AArch64MemOperand::kAddrModeBOrX, k64BitSize, baseOpnd, *addOpnd, k8BitShift); + RegOperand &tgt = CreateRegisterOperandOfType(PTY_a64); + SelectAdd(tgt, baseOpnd, *disp, PTY_u64); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xbr, tgt)); +} + +Operand *AArch64CGFunc::SelectLazyLoad(Operand &opnd0, PrimType primType) { + ASSERT(opnd0.IsRegister(), "wrong type."); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_lazy_ldr, resOpnd, opnd0)); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectLazyLoadStatic(MIRSymbol &st, int64 offset, PrimType primType) { + StImmOperand &srcOpnd = CreateStImmOperand(st, offset, 0); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_lazy_ldr_static, resOpnd, srcOpnd)); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectLoadArrayClassCache(MIRSymbol &st, int64 offset, PrimType primType) { + StImmOperand &srcOpnd = CreateStImmOperand(st, offset, 0); + RegOperand &resOpnd = CreateRegisterOperandOfType(primType); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_arrayclass_cache_ldr, resOpnd, srcOpnd)); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectAlloca(UnaryNode &node, Operand &opnd0) { + ASSERT((node.GetPrimType() == PTY_a64), "wrong type"); + PrimType stype = node.Opnd(0)->GetPrimType(); + Operand *resOpnd = &opnd0; + if (GetPrimTypeBitSize(stype) < GetPrimTypeBitSize(PTY_u64)) { + resOpnd = &CreateRegisterOperandOfType(PTY_u64); + SelectCvtInt2Int(nullptr, resOpnd, &opnd0, stype, PTY_u64); + } + + RegOperand &aliOp = CreateRegisterOperandOfType(PTY_u64); + + SelectAdd(aliOp, *resOpnd, CreateImmOperand(kAarch64StackPtrAlignment - 1, k64BitSize, true), PTY_u64); + Operand &shifOpnd = CreateImmOperand(__builtin_ctz(kAarch64StackPtrAlignment), k64BitSize, true); + SelectShift(aliOp, aliOp, shifOpnd, kShiftLright, PTY_u64); + SelectShift(aliOp, aliOp, shifOpnd, kShiftLeft, PTY_u64); + Operand &spOpnd = GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + SelectSub(spOpnd, spOpnd, aliOp, PTY_u64); + int64 argsToStkpassSize = GetMemlayout()->SizeOfArgsToStackPass(); + if (argsToStkpassSize > 0) { + RegOperand &resallo = CreateRegisterOperandOfType(PTY_u64); + SelectAdd(resallo, spOpnd, CreateImmOperand(argsToStkpassSize, k64BitSize, true), PTY_u64); + return &resallo; + } else { + return &SelectCopy(spOpnd, PTY_u64, PTY_u64); + } +} + +Operand *AArch64CGFunc::SelectMalloc(UnaryNode &node, Operand &opnd0) { + PrimType retType = node.GetPrimType(); + ASSERT((retType == PTY_a64), "wrong type"); + + std::vector opndVec; + RegOperand &resOpnd = CreateRegisterOperandOfType(retType); + opndVec.emplace_back(&resOpnd); + opndVec.emplace_back(&opnd0); + /* Use calloc to make sure allocated memory is zero-initialized */ + const std::string &funcName = "calloc"; + PrimType srcPty = PTY_u64; + if (opnd0.GetSize() <= k32BitSize) { + srcPty = PTY_u32; + } + Operand &opnd1 = CreateImmOperand(1, srcPty, false); + opndVec.emplace_back(&opnd1); + SelectLibCall(funcName, opndVec, srcPty, retType); + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectGCMalloc(GCMallocNode &node) { + PrimType retType = node.GetPrimType(); + ASSERT((retType == PTY_a64), "wrong type"); + + /* Get the size and alignment of the type. */ + TyIdx tyIdx = node.GetTyIdx(); + uint64 size = GetBecommon().GetTypeSize(tyIdx); + uint8 align = AArch64RTSupport::kObjectAlignment; + + /* Generate the call to MCC_NewObj */ + Operand &opndSize = CreateImmOperand(size, k64BitSize, false); + Operand &opndAlign = CreateImmOperand(align, k64BitSize, false); + + RegOperand &resOpnd = CreateRegisterOperandOfType(retType); + + std::vector opndVec{ &resOpnd, &opndSize, &opndAlign }; + + const std::string &funcName = "MCC_NewObj"; + SelectLibCall(funcName, opndVec, PTY_u64, retType); + + return &resOpnd; +} + +Operand *AArch64CGFunc::SelectJarrayMalloc(JarrayMallocNode &node, Operand &opnd0) { + PrimType retType = node.GetPrimType(); + ASSERT((retType == PTY_a64), "wrong type"); + + /* Extract jarray type */ + TyIdx tyIdx = node.GetTyIdx(); + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(tyIdx); + ASSERT(type != nullptr, "nullptr check"); + CHECK_FATAL(type->GetKind() == kTypeJArray, "expect MIRJarrayType"); + auto jaryType = static_cast(type); + uint64 fixedSize = AArch64RTSupport::kArrayContentOffset; + uint8 align = AArch64RTSupport::kObjectAlignment; + + MIRType *elemType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(jaryType->GetElemTyIdx()); + PrimType elemPrimType = elemType->GetPrimType(); + uint64 elemSize = GetPrimTypeSize(elemPrimType); + + /* Generate the cal to MCC_NewObj_flexible */ + Operand &opndFixedSize = CreateImmOperand(PTY_u64, fixedSize); + Operand &opndElemSize = CreateImmOperand(PTY_u64, elemSize); + + Operand *opndNElems = &opnd0; + + Operand *opndNElems64 = &static_cast(CreateRegisterOperandOfType(PTY_u64)); + SelectCvtInt2Int(nullptr, opndNElems64, opndNElems, PTY_u32, PTY_u64); + + Operand &opndAlign = CreateImmOperand(PTY_u64, align); + + RegOperand &resOpnd = CreateRegisterOperandOfType(retType); + + std::vector opndVec{ &resOpnd, &opndFixedSize, &opndElemSize, opndNElems64, &opndAlign }; + + const std::string &funcName = "MCC_NewObj_flexible"; + SelectLibCall(funcName, opndVec, PTY_u64, retType); + + /* Generate the store of the object length field */ + MemOperand &opndArrayLengthField = CreateMemOpnd(resOpnd, AArch64RTSupport::kArrayLengthOffset, k4BitSize); + RegOperand *regOpndNElems = &SelectCopy(*opndNElems, PTY_u32, PTY_u32); + ASSERT(regOpndNElems != nullptr, "null ptr check!"); + SelectCopy(opndArrayLengthField, PTY_u32, *regOpndNElems, PTY_u32); + + return &resOpnd; +} + +Operand &AArch64CGFunc::GetZeroOpnd(uint32 size) { + return AArch64RegOperand::GetZeroRegister(size <= k32BitSize ? k32BitSize : k64BitSize); +} + +bool AArch64CGFunc::IsFrameReg(const RegOperand &opnd) const { + if (opnd.GetRegisterNumber() == RFP) { + return true; + } else { + return false; + } +} + +/* + * This function returns true to indicate that the clean up code needs to be generated, + * otherwise it does not need. In GCOnly mode, it always returns false. + */ +bool AArch64CGFunc::NeedCleanup() { + if (CGOptions::IsGCOnly()) { + return false; + } + AArch64MemLayout *layout = static_cast(GetMemlayout()); + if (layout->GetSizeOfRefLocals() > 0) { + return true; + } + for (uint32 i = 0; i < GetFunction().GetFormalCount(); i++) { + TypeAttrs ta = GetFunction().GetNthParamAttr(i); + if (ta.GetAttr(ATTR_localrefvar)) { + return true; + } + } + + return false; +} + +/* + * bb must be the cleanup bb. + * this function must be invoked before register allocation. + * extended epilogue is specific for fast exception handling and is made up of + * clean up code and epilogue. + * clean up code is generated here while epilogue is generated in GeneratePrologEpilog() + */ +void AArch64CGFunc::GenerateCleanupCodeForExtEpilog(BB &bb) { + ASSERT(GetLastBB()->GetPrev()->GetFirstStmt() == GetCleanupLabel(), "must be"); + + if (NeedCleanup()) { + /* this is necessary for code insertion. */ + SetCurBB(bb); + + AArch64RegOperand ®Opnd0 = + GetOrCreatePhysicalRegisterOperand(R0, kSizeOfPtr * kBitsPerByte, GetRegTyFromPrimTy(PTY_a64)); + AArch64RegOperand ®Opnd1 = + GetOrCreatePhysicalRegisterOperand(R1, kSizeOfPtr * kBitsPerByte, GetRegTyFromPrimTy(PTY_a64)); + /* allocate 16 bytes to store reg0 and reg1 (each reg has 8 bytes) */ + AArch64MemOperand &frameAlloc = CreateCallFrameOperand(-16, kSizeOfPtr * kBitsPerByte); + Insn &allocInsn = GetCG()->BuildInstruction(MOP_xstp, regOpnd0, regOpnd1, frameAlloc); + allocInsn.SetDoNotRemove(true); + AppendInstructionTo(allocInsn, *this); + + /* invoke MCC_CleanupLocalStackRef(). */ + HandleRCCall(false); + /* deallocate 16 bytes which used to store reg0 and reg1 */ + AArch64MemOperand &frameDealloc = CreateCallFrameOperand(16, kSizeOfPtr * kBitsPerByte); + GenRetCleanup(cleanEANode, true); + Insn &deallocInsn = GetCG()->BuildInstruction(MOP_xldp, regOpnd0, regOpnd1, frameDealloc); + deallocInsn.SetDoNotRemove(true); + AppendInstructionTo(deallocInsn, *this); + /* Update cleanupbb since bb may have been splitted */ + SetCleanupBB(*GetCurBB()); + } +} + +/* + * bb must be the cleanup bb. + * this function must be invoked before register allocation. + */ +void AArch64CGFunc::GenerateCleanupCode(BB &bb) { + ASSERT(GetLastBB()->GetPrev()->GetFirstStmt() == GetCleanupLabel(), "must be"); + if (!NeedCleanup()) { + return; + } + + /* this is necessary for code insertion. */ + SetCurBB(bb); + + /* R0 is lived-in for clean-up code, save R0 before invocation */ + AArch64RegOperand &livein = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + + if (!GetCG()->GenLocalRC()) { + /* by pass local RC operations. */ + } else if (Globals::GetInstance()->GetOptimLevel() > 0) { + regno_t vreg = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &backupRegOp = CreateVirtualRegisterOperand(vreg); + backupRegOp.SetRegNotBBLocal(); + SelectCopy(backupRegOp, PTY_a64, livein, PTY_a64); + + /* invoke MCC_CleanupLocalStackRef(). */ + HandleRCCall(false); + SelectCopy(livein, PTY_a64, backupRegOp, PTY_a64); + } else { + /* + * Register Allocation for O0 can not handle this case, so use a callee saved register directly. + * If yieldpoint is enabled, we use R20 instead R19. + */ + AArch64reg backupRegNO = GetCG()->GenYieldPoint() ? R20 : R19; + RegOperand &backupRegOp = GetOrCreatePhysicalRegisterOperand(backupRegNO, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + SelectCopy(backupRegOp, PTY_a64, livein, PTY_a64); + /* invoke MCC_CleanupLocalStackRef(). */ + HandleRCCall(false); + SelectCopy(livein, PTY_a64, backupRegOp, PTY_a64); + } + + /* invoke _Unwind_Resume */ + std::string funcName("_Unwind_Resume"); + MIRSymbol *sym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + sym->SetNameStrIdx(funcName); + sym->SetStorageClass(kScText); + sym->SetSKind(kStFunc); + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + srcOpnds->PushOpnd(livein); + AppendCall(*sym, *srcOpnds); + /* + * this instruction is unreachable, but we need it as the return address of previous + * "bl _Unwind_Resume" for stack unwinding. + */ + Insn &nop = GetCG()->BuildInstruction(MOP_xblr, livein, *srcOpnds); + GetCurBB()->AppendInsn(nop); + GetCurBB()->SetHasCall(); + + /* Update cleanupbb since bb may have been splitted */ + SetCleanupBB(*GetCurBB()); +} + +/* if offset < 0, allocation; otherwise, deallocation */ +AArch64MemOperand &AArch64CGFunc::CreateCallFrameOperand(int32 offset, int32 size) { + return *memPool->New(RSP, offset, size, + (offset < 0) ? AArch64MemOperand::kPreIndex : AArch64MemOperand::kPostIndex); +} + +AArch64CGFunc::MovkLslOperandArray AArch64CGFunc::movkLslOperands = { + LogicalShiftLeftOperand(0, 4), LogicalShiftLeftOperand(16, 4), + LogicalShiftLeftOperand(static_cast(-1), 0), /* invalid entry */ + LogicalShiftLeftOperand(static_cast(-1), 0), /* invalid entry */ + LogicalShiftLeftOperand(0, 6), LogicalShiftLeftOperand(16, 6), + LogicalShiftLeftOperand(32, 6), LogicalShiftLeftOperand(48, 6), +}; + +/* kShiftAmount12 = 12, less than 16, use 4 bit to store, bitLen is 4 */ +LogicalShiftLeftOperand AArch64CGFunc::addSubLslOperand(kShiftAmount12, 4); + +AArch64MemOperand &AArch64CGFunc::CreateStkTopOpnd(int32 offset, int32 size) { + return *memPool->New(RFP, offset, size); +} + +void AArch64CGFunc::GenSaveMethodInfoCode(BB &bb) { + if (GetCG()->UseFastUnwind()) { + BB *formerCurBB = GetCurBB(); + GetDummyBB()->ClearInsns(); + SetCurBB(*GetDummyBB()); + /* + * FUNCATTR_bridge for function: Ljava_2Flang_2FString_3B_7CcompareTo_7C_28Ljava_2Flang_2FObject_3B_29I, to + * exclude this funciton this function is a bridge function generated for Java Genetic + */ + if ((GetFunction().GetAttr(FUNCATTR_native) || GetFunction().GetAttr(FUNCATTR_fast_native)) && + !GetFunction().GetAttr(FUNCATTR_critical_native) && !GetFunction().GetAttr(FUNCATTR_bridge)) { + RegOperand &fpReg = GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + AArch64RegOperand &parmRegOpnd1 = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt); + srcOpnds->PushOpnd(parmRegOpnd1); + Operand &immOpnd = CreateImmOperand(0, k64BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadri64, parmRegOpnd1, immOpnd)); + AArch64RegOperand &parmRegOpnd2 = GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, kRegTyInt); + srcOpnds->PushOpnd(parmRegOpnd2); + SelectCopy(parmRegOpnd2, PTY_a64, fpReg, PTY_a64); + + MIRSymbol *sym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + std::string funcName("MCC_SetRiskyUnwindContext"); + sym->SetNameStrIdx(funcName); + + sym->SetStorageClass(kScText); + sym->SetSKind(kStFunc); + AppendCall(*sym, *srcOpnds); + bb.SetHasCall(); + } + + bb.InsertAtBeginning(*GetDummyBB()); + SetCurBB(*formerCurBB); + } +} + +bool AArch64CGFunc::HasStackLoadStore() { + FOR_ALL_BB(bb, this) { + FOR_BB_INSNS(insn, bb) { + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + + if ((base != nullptr) && base->IsRegister()) { + RegOperand *regOpnd = static_cast(base); + RegType regType = regOpnd->GetRegisterType(); + uint32 regNO = regOpnd->GetRegisterNumber(); + if (((regType != kRegTyCc) && ((regNO == R29) || (regNO == RSP))) || (regType == kRegTyVary)) { + return true; + } + } + } + } + } + } + return false; +} + +void AArch64CGFunc::GenerateYieldpoint(BB &bb) { + /* ldr wzr, [RYP] # RYP hold address of the polling page. */ + auto &wzr = AArch64RegOperand::Get32bitZeroRegister(); + auto &pollingPage = CreateMemOpnd(RYP, 0, k32BitSize); + auto &yieldPoint = GetCG()->BuildInstruction(MOP_wldr, wzr, pollingPage); + if (GetCG()->GenerateVerboseCG()) { + yieldPoint.SetComment("yieldpoint"); + } + bb.AppendInsn(yieldPoint); +} + +Operand &AArch64CGFunc::ProcessReturnReg(PrimType primType, int32 sReg) { + return GetTargetRetOperand(primType, sReg); +} + +Operand &AArch64CGFunc::GetTargetRetOperand(PrimType primType, int32 sReg) { + uint32 bitSize = GetPrimTypeBitSize(primType) < k32BitSize ? k32BitSize : GetPrimTypeBitSize(primType); + AArch64reg pReg; + if (sReg < 0) { + return GetOrCreatePhysicalRegisterOperand(IsPrimitiveFloat(primType) ? S0 : R0, bitSize, + GetRegTyFromPrimTy(primType)); + } else { + switch (sReg) { + case kSregRetval0: + pReg = IsPrimitiveFloat(primType) ? S0 : R0; + break; + case kSregRetval1: + pReg = R1; + break; + default: + pReg = RLAST_INT_REG; + ASSERT(0, "GetTargetRetOperand: NYI"); + } + return GetOrCreatePhysicalRegisterOperand(pReg, bitSize, GetRegTyFromPrimTy(primType)); + } +} + +RegOperand &AArch64CGFunc::CreateRegisterOperandOfType(PrimType primType) { + RegType regType = GetRegTyFromPrimTy(primType); + uint32 byteLength = GetPrimTypeSize(primType); + return CreateRegisterOperandOfType(regType, byteLength); +} + +RegOperand &AArch64CGFunc::CreateRegisterOperandOfType(RegType regty, uint32 byteLen) { + /* BUG: if half-precision floating point operations are supported? */ + if (byteLen < k4ByteSize) { + byteLen = k4ByteSize; /* AArch64 has 32-bit and 64-bit registers only */ + } + regno_t vRegNO = NewVReg(regty, byteLen); + return CreateVirtualRegisterOperand(vRegNO); +} + +RegOperand &AArch64CGFunc::CreateRflagOperand() { + /* AArch64 has Status register that is 32-bit wide. */ + regno_t vRegNO = NewVRflag(); + return CreateVirtualRegisterOperand(vRegNO); +} + +void AArch64CGFunc::MergeReturn() { + ASSERT(GetCurBB()->GetPrev()->GetFirstStmt() == GetCleanupLabel(), "must be"); + + uint32 exitBBSize = GetExitBBsVec().size(); + if (exitBBSize == 0) { + return; + } + if ((exitBBSize == 1) && GetExitBB(0) == GetCurBB()) { + return; + } + if (exitBBSize == 1) { + BB *onlyExitBB = GetExitBB(0); + BB *onlyExitBBNext = onlyExitBB->GetNext(); + StmtNode *stmt = onlyExitBBNext->GetFirstStmt(); + /* only deal with the return_BB in the middle */ + if (stmt != GetCleanupLabel()) { + LabelIdx labidx = CreateLabel(); + BB *retBB = CreateNewBB(labidx, onlyExitBB->IsUnreachable(), BB::kBBReturn, onlyExitBB->GetFrequency()); + onlyExitBB->AppendBB(*retBB); + /* modify the original return BB. */ + ASSERT(onlyExitBB->GetKind() == BB::kBBReturn, "Error: suppose to merge multi return bb"); + onlyExitBB->SetKind(BB::kBBFallthru); + + GetExitBBsVec().pop_back(); + GetExitBBsVec().emplace_back(retBB); + return; + } + } + + LabelIdx labidx = CreateLabel(); + LabelOperand &targetOpnd = GetOrCreateLabelOperand(labidx); + uint32 freq = 0; + for (auto *tmpBB : GetExitBBsVec()) { + ASSERT(tmpBB->GetKind() == BB::kBBReturn, "Error: suppose to merge multi return bb"); + tmpBB->SetKind(BB::kBBGoto); + tmpBB->AppendInsn(GetCG()->BuildInstruction(MOP_xuncond, targetOpnd)); + freq += tmpBB->GetFrequency(); + } + BB *retBB = CreateNewBB(labidx, false, BB::kBBReturn, freq); + GetCleanupBB()->PrependBB(*retBB); + + GetExitBBsVec().clear(); + GetExitBBsVec().emplace_back(retBB); +} + +void AArch64CGFunc::HandleRetCleanup(NaryStmtNode &retNode) { + if (!GetCG()->GenLocalRC()) { + /* handle local rc is disabled. */ + return; + } + + Opcode ops[11] = { OP_label, OP_goto, OP_brfalse, OP_brtrue, OP_return, OP_call, + OP_icall, OP_rangegoto, OP_catch, OP_try, OP_endtry }; + std::set branchOp(ops, ops + 11); + + /* get cleanup intrinsic */ + bool found = false; + StmtNode *cleanupNode = retNode.GetPrev(); + cleanEANode = nullptr; + while (cleanupNode != nullptr) { + if (branchOp.find(cleanupNode->GetOpCode()) != branchOp.end()) { + if (cleanupNode->GetOpCode() == OP_call) { + CallNode *callNode = static_cast(cleanupNode); + MIRFunction *fn = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode->GetPUIdx()); + MIRSymbol *fsym = GetFunction().GetLocalOrGlobalSymbol(fn->GetStIdx(), false); + if ((fsym->GetName() == "MCC_DecRef_NaiveRCFast") || (fsym->GetName() == "MCC_IncRef_NaiveRCFast") || + (fsym->GetName() == "MCC_IncDecRef_NaiveRCFast") || (fsym->GetName() == "MCC_LoadRefStatic") || + (fsym->GetName() == "MCC_LoadRefField") || (fsym->GetName() == "MCC_LoadReferentField") || + (fsym->GetName() == "MCC_LoadRefField_NaiveRCFast") || (fsym->GetName() == "MCC_LoadVolatileField") || + (fsym->GetName() == "MCC_LoadVolatileStaticField") || (fsym->GetName() == "MCC_LoadWeakField") || + (fsym->GetName() == "MCC_CheckObjMem")) { + cleanupNode = cleanupNode->GetPrev(); + continue; + } else { + break; + } + } else { + break; + } + } + + if (OP_intrinsiccall == cleanupNode->GetOpCode()) { + IntrinsiccallNode *tempNode = static_cast(cleanupNode); + if ((tempNode->GetIntrinsic() == INTRN_MPL_CLEANUP_LOCALREFVARS) || + (tempNode->GetIntrinsic() == INTRN_MPL_CLEANUP_LOCALREFVARS_SKIP)) { + GenRetCleanup(tempNode); + if (cleanEANode != nullptr) { + GenRetCleanup(cleanEANode, true); + } + found = true; + break; + } + if (tempNode->GetIntrinsic() == INTRN_MPL_CLEANUP_NORETESCOBJS) { + cleanEANode = tempNode; + } + } + cleanupNode = cleanupNode->GetPrev(); + } + + if (!found) { + MIRSymbol *retRef = nullptr; + if (retNode.NumOpnds() != 0) { + retRef = GetRetRefSymbol(*static_cast(retNode).Opnd(0)); + } + HandleRCCall(false, retRef); + } +} + +bool AArch64CGFunc::GenRetCleanup(const IntrinsiccallNode *cleanupNode, bool forEA) { +#undef CC_DEBUG_INFO + +#ifdef CC_DEBUG_INFO + LogInfo::MapleLogger() << "==============" << GetFunction().GetName() << "==============" << '\n'; +#endif + + if (cleanupNode == nullptr) { + return false; + } + + int32 minByteOffset = INT_MAX; + int32 maxByteOffset = 0; + + int32 skipIndex = -1; + MIRSymbol *skipSym = nullptr; + size_t refSymNum = 0; + if (cleanupNode->GetIntrinsic() == INTRN_MPL_CLEANUP_LOCALREFVARS) { + refSymNum = cleanupNode->GetNopndSize(); + if (refSymNum < 1) { + return true; + } + } else if (cleanupNode->GetIntrinsic() == INTRN_MPL_CLEANUP_LOCALREFVARS_SKIP) { + refSymNum = cleanupNode->GetNopndSize(); + /* refSymNum == 0, no local refvars; refSymNum == 1 and cleanup skip, so nothing to do */ + if (refSymNum < 2) { + return true; + } + BaseNode *skipExpr = cleanupNode->Opnd(refSymNum - 1); + + CHECK_FATAL(skipExpr->GetOpCode() == OP_dread, "should be dread"); + DreadNode *refNode = static_cast(skipExpr); + skipSym = GetFunction().GetLocalOrGlobalSymbol(refNode->GetStIdx()); + + refSymNum -= 1; + } else if (cleanupNode->GetIntrinsic() == INTRN_MPL_CLEANUP_NORETESCOBJS) { + refSymNum = cleanupNode->GetNopndSize(); + /* the number of operands of intrinsic call INTRN_MPL_CLEANUP_NORETESCOBJS must be more than 1 */ + if (refSymNum < 2) { + return true; + } + BaseNode *skipexpr = cleanupNode->Opnd(0); + CHECK_FATAL(skipexpr->GetOpCode() == OP_dread, "should be dread"); + DreadNode *refnode = static_cast(skipexpr); + skipSym = GetFunction().GetLocalOrGlobalSymbol(refnode->GetStIdx()); + } + + /* now compute the offset range */ + std::vector offsets; + AArch64MemLayout *memLayout = static_cast(this->GetMemlayout()); + for (size_t i = 0; i < refSymNum; ++i) { + BaseNode *argExpr = cleanupNode->Opnd(i); + CHECK_FATAL(argExpr->GetOpCode() == OP_dread, "should be dread"); + DreadNode *refNode = static_cast(argExpr); + MIRSymbol *refSymbol = GetFunction().GetLocalOrGlobalSymbol(refNode->GetStIdx()); + if (memLayout->GetSymAllocTable().size() <= refSymbol->GetStIndex()) { + ERR(kLncErr, "access memLayout->GetSymAllocTable() failed"); + return false; + } + AArch64SymbolAlloc *symLoc = + static_cast(memLayout->GetSymAllocInfo(refSymbol->GetStIndex())); + int32 tempOffset = GetBaseOffset(*symLoc); + offsets.emplace_back(tempOffset); +#ifdef CC_DEBUG_INFO + LogInfo::MapleLogger() << "refsym " << refSymbol->GetName() << " offset " << tempOffset << '\n'; +#endif + minByteOffset = (minByteOffset > tempOffset) ? tempOffset : minByteOffset; + maxByteOffset = (maxByteOffset < tempOffset) ? tempOffset : maxByteOffset; + } + + /* get the skip offset */ + int32 skipOffset = -1; + if (skipSym != nullptr) { + AArch64SymbolAlloc *symLoc = static_cast(memLayout->GetSymAllocInfo(skipSym->GetStIndex())); + CHECK_FATAL(GetBaseOffset(*symLoc) < std::numeric_limits::max(), "out of range"); + skipOffset = GetBaseOffset(*symLoc); + offsets.emplace_back(skipOffset); + +#ifdef CC_DEBUG_INFO + LogInfo::MapleLogger() << "skip " << skipSym->GetName() << " offset " << skipOffset << '\n'; +#endif + + skipIndex = symLoc->GetOffset() / kOffsetAlign; + } + + /* call runtime cleanup */ + if (minByteOffset < INT_MAX) { + int32 refLocBase = memLayout->GetRefLocBaseLoc(); + uint32 refNum = memLayout->GetSizeOfRefLocals() / kOffsetAlign; + CHECK_FATAL((refLocBase + (refNum - 1) * kIntregBytelen) < std::numeric_limits::max(), "out of range"); + int32 refLocEnd = refLocBase + (refNum - 1) * kIntregBytelen; + int32 realMin = minByteOffset < refLocBase ? refLocBase : minByteOffset; + int32 realMax = maxByteOffset > refLocEnd ? refLocEnd : maxByteOffset; + if (forEA) { + std::sort(offsets.begin(), offsets.end()); + int32 prev = offsets[0]; + for (size_t i = 1; i < offsets.size(); i++) { + CHECK_FATAL((offsets[i] == prev) || ((offsets[i] - prev) == kIntregBytelen), "must be"); + prev = offsets[i]; + } + CHECK_FATAL((refLocBase - prev) == kIntregBytelen, "must be"); + realMin = minByteOffset; + realMax = maxByteOffset; + } +#ifdef CC_DEBUG_INFO + LogInfo::MapleLogger() << " realMin " << realMin << " realMax " << realMax << '\n'; +#endif + if (realMax < realMin) { + /* maybe there is a cleanup intrinsic bug, use CHECK_FATAL instead? */ + CHECK_FATAL(false, "must be"); + } + + /* optimization for little slot cleanup */ + if (realMax == realMin && !forEA) { + RegOperand &phyOpnd = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + Operand &stackLoc = CreateStkTopOpnd(realMin, kSizeOfPtr * kBitsPerByte); + Insn &ldrInsn = GetCG()->BuildInstruction(PickLdInsn(k64BitSize, PTY_a64), phyOpnd, stackLoc); + GetCurBB()->AppendInsn(ldrInsn); + + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + srcOpnds->PushOpnd(phyOpnd); + MIRSymbol *callSym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + std::string funcName("MCC_DecRef_NaiveRCFast"); + callSym->SetNameStrIdx(funcName); + callSym->SetStorageClass(kScText); + callSym->SetSKind(kStFunc); + Insn &callInsn = AppendCall(*callSym, *srcOpnds, true); + static_cast(callInsn).SetRefSkipIndex(skipIndex); + GetCurBB()->SetHasCall(); + /* because of return stmt is often the last stmt */ + GetCurBB()->SetFrequency(frequency); + + return true; + } + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + + AArch64ImmOperand &beginOpnd = CreateImmOperand(realMin, k64BitSize, true); + regno_t vRegNO0 = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &vReg0 = CreateVirtualRegisterOperand(vRegNO0); + RegOperand &fpOpnd = GetOrCreateStackBaseRegOperand(); + SelectAdd(vReg0, fpOpnd, beginOpnd, PTY_i64); + + AArch64RegOperand &parmRegOpnd1 = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + srcOpnds->PushOpnd(parmRegOpnd1); + SelectCopy(parmRegOpnd1, PTY_a64, vReg0, PTY_a64); + + uint32 realRefNum = (realMax - realMin) / kOffsetAlign + 1; + + AArch64ImmOperand &countOpnd = CreateImmOperand(realRefNum, k64BitSize, true); + + AArch64RegOperand &parmRegOpnd2 = GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + srcOpnds->PushOpnd(parmRegOpnd2); + SelectCopyImm(parmRegOpnd2, countOpnd, PTY_i64); + + MIRSymbol *funcSym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + if ((skipSym != nullptr) && (skipOffset >= realMin) && (skipOffset <= realMax)) { + /* call cleanupskip */ + uint32 stOffset = (skipOffset - realMin) / kOffsetAlign; + AArch64ImmOperand &retLoc = CreateImmOperand(stOffset, k64BitSize, true); + + AArch64RegOperand &parmRegOpnd3 = GetOrCreatePhysicalRegisterOperand(R2, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + srcOpnds->PushOpnd(parmRegOpnd3); + SelectCopyImm(parmRegOpnd3, retLoc, PTY_i64); + + std::string funcName; + if (forEA) { + funcName = "MCC_CleanupNonRetEscObj"; + } else { + funcName = "MCC_CleanupLocalStackRefSkip_NaiveRCFast"; + } + funcSym->SetNameStrIdx(funcName); +#ifdef CC_DEBUG_INFO + LogInfo::MapleLogger() << "num " << real_ref_num << " skip loc " << stOffset << '\n'; +#endif + } else { + /* call cleanup */ + CHECK_FATAL(!forEA, "must be"); + std::string funcName("MCC_CleanupLocalStackRef_NaiveRCFast"); + funcSym->SetNameStrIdx(funcName); +#ifdef CC_DEBUG_INFO + LogInfo::MapleLogger() << "num " << real_ref_num << '\n'; +#endif + } + + funcSym->SetStorageClass(kScText); + funcSym->SetSKind(kStFunc); + Insn &callInsn = AppendCall(*funcSym, *srcOpnds, true); + static_cast(callInsn).SetRefSkipIndex(skipIndex); + GetCurBB()->SetHasCall(); + GetCurBB()->SetFrequency(frequency); + } + return true; +} + +RegOperand &AArch64CGFunc::CreateVirtualRegisterOperand(regno_t vRegNO) { + ASSERT((vRegOperandTable.find(vRegNO) == vRegOperandTable.end()) || IsVRegNOForPseudoRegister(vRegNO), ""); + uint8 bitSize = static_cast((static_cast(vRegTable[vRegNO].GetSize())) * kBitsPerByte); + RegOperand *res = memPool->New(vRegNO, bitSize, vRegTable.at(vRegNO).GetType()); + vRegOperandTable[vRegNO] = res; + return *res; +} + +RegOperand &AArch64CGFunc::GetOrCreateVirtualRegisterOperand(regno_t vRegNO) { + auto it = vRegOperandTable.find(vRegNO); + return (it != vRegOperandTable.end()) ? *(it->second) : CreateVirtualRegisterOperand(vRegNO); +} + +/* + * Traverse all call insn to determine return type of it + * If the following insn is mov/str/blr and use R0/V0, it means the call insn have reture value + */ +void AArch64CGFunc::DetermineReturnTypeofCall() { + FOR_ALL_BB(bb, this) { + if (bb->IsUnreachable() || !bb->HasCall()) { + continue; + } + FOR_BB_INSNS(insn, bb) { + if (!insn->IsCall()) { + continue; + } + Insn *nextInsn = insn->GetNextMachineInsn(); + if (nextInsn == nullptr) { + continue; + } + if ((nextInsn->IsMove() && nextInsn->GetOperand(kInsnSecondOpnd).IsRegister()) || + nextInsn->IsStore() || + (nextInsn->IsCall() && nextInsn->GetOperand(kInsnFirstOpnd).IsRegister())) { + auto *srcOpnd = static_cast(nextInsn->GetOpnd(kInsnFirstOpnd)); + CHECK_FATAL(srcOpnd != nullptr, "nullptr"); + if (!srcOpnd->IsPhysicalRegister()) { + continue; + } + if (srcOpnd->GetRegisterNumber() == R0) { + insn->SetRetType(Insn::kRegInt); + continue; + } + if (srcOpnd->GetRegisterNumber() == V0) { + insn->SetRetType(Insn::kRegFloat); + } + } + } + } +} + +void AArch64CGFunc::HandleRCCall(bool begin, const MIRSymbol *retRef) { + if (!GetCG()->GenLocalRC() && !begin) { + /* handle local rc is disabled. */ + return; + } + + AArch64MemLayout *memLayout = static_cast(this->GetMemlayout()); + int32 refNum = memLayout->GetSizeOfRefLocals() / kOffsetAlign; + if (!refNum) { + if (begin) { + GenerateYieldpoint(*GetCurBB()); + yieldPointInsn = GetCurBB()->GetLastInsn(); + } + return; + } + + /* no MCC_CleanupLocalStackRefSkip when ret_ref is the only ref symbol */ + if ((refNum == 1) && (retRef != nullptr)) { + if (begin) { + GenerateYieldpoint(*GetCurBB()); + yieldPointInsn = GetCurBB()->GetLastInsn(); + } + return; + } + CHECK_FATAL(refNum < 0xFFFF, "not enough room for size."); + int32 refLocBase = memLayout->GetRefLocBaseLoc(); + CHECK_FATAL((refLocBase >= 0) && (refLocBase < 0xFFFF), "not enough room for offset."); + int32 formalRef = 0; + /* avoid store zero to formal localrefvars. */ + if (begin) { + for (uint32 i = 0; i < GetFunction().GetFormalCount(); ++i) { + if (GetFunction().GetNthParamAttr(i).GetAttr(ATTR_localrefvar)) { + refNum--; + formalRef++; + } + } + } + /* + * if the number of local refvar is less than 12, use stp or str to init local refvar + * else call function MCC_InitializeLocalStackRef to init. + */ + if (begin && (refNum <= kRefNum12) && ((refLocBase + kIntregBytelen * (refNum - 1)) < kStpLdpImm64UpperBound)) { + int32 pairNum = refNum / kDivide2; + int32 singleNum = refNum % kDivide2; + const int32 pairRefBytes = 16; /* the size of each pair of ref is 16 bytes */ + int32 ind = 0; + while (ind < pairNum) { + int32 offset = memLayout->GetRefLocBaseLoc() + kIntregBytelen * formalRef + pairRefBytes * ind; + Operand &zeroOp = GetZeroOpnd(k64BitSize); + Operand &stackLoc = CreateStkTopOpnd(offset, kSizeOfPtr * kBitsPerByte); + Insn &setInc = GetCG()->BuildInstruction(MOP_xstp, zeroOp, zeroOp, stackLoc); + GetCurBB()->AppendInsn(setInc); + ind++; + } + if (singleNum > 0) { + int32 offset = memLayout->GetRefLocBaseLoc() + kIntregBytelen * formalRef + kIntregBytelen * (refNum - 1); + Operand &zeroOp = GetZeroOpnd(k64BitSize); + Operand &stackLoc = CreateStkTopOpnd(offset, kSizeOfPtr * kBitsPerByte); + Insn &setInc = GetCG()->BuildInstruction(MOP_xstr, zeroOp, stackLoc); + GetCurBB()->AppendInsn(setInc); + } + /* Insert Yield Point just after localrefvar are initialized. */ + GenerateYieldpoint(*GetCurBB()); + yieldPointInsn = GetCurBB()->GetLastInsn(); + return; + } + + /* refNum is 1 and refvar is not returned, this refvar need to call MCC_DecRef_NaiveRCFast. */ + if ((refNum == 1) && !begin && (retRef == nullptr)) { + RegOperand &phyOpnd = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + Operand &stackLoc = CreateStkTopOpnd(memLayout->GetRefLocBaseLoc(), kSizeOfPtr * kBitsPerByte); + Insn &ldrInsn = GetCG()->BuildInstruction(PickLdInsn(k64BitSize, PTY_a64), phyOpnd, stackLoc); + GetCurBB()->AppendInsn(ldrInsn); + + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + srcOpnds->PushOpnd(phyOpnd); + MIRSymbol *callSym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + std::string funcName("MCC_DecRef_NaiveRCFast"); + callSym->SetNameStrIdx(funcName); + callSym->SetStorageClass(kScText); + callSym->SetSKind(kStFunc); + + AppendCall(*callSym, *srcOpnds); + GetCurBB()->SetHasCall(); + if (frequency != 0) { + GetCurBB()->SetFrequency(frequency); + } + return; + } + + /* refNum is 2 and one of refvar is returned, only another one is needed to call MCC_DecRef_NaiveRCFast. */ + if ((refNum == 2) && !begin && retRef != nullptr) { + AArch64SymbolAlloc *symLoc = + static_cast(memLayout->GetSymAllocInfo(retRef->GetStIndex())); + int32 stOffset = symLoc->GetOffset() / kOffsetAlign; + RegOperand &phyOpnd = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + Operand *stackLoc = nullptr; + if (stOffset == 0) { + /* just have to Dec the next one. */ + stackLoc = &CreateStkTopOpnd(memLayout->GetRefLocBaseLoc() + kIntregBytelen, kSizeOfPtr * kBitsPerByte); + } else { + /* just have to Dec the current one. */ + stackLoc = &CreateStkTopOpnd(memLayout->GetRefLocBaseLoc(), kSizeOfPtr * kBitsPerByte); + } + Insn &ldrInsn = GetCG()->BuildInstruction(PickLdInsn(k64BitSize, PTY_a64), phyOpnd, *stackLoc); + GetCurBB()->AppendInsn(ldrInsn); + + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + srcOpnds->PushOpnd(phyOpnd); + MIRSymbol *callSym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + std::string funcName("MCC_DecRef_NaiveRCFast"); + callSym->SetNameStrIdx(funcName); + callSym->SetStorageClass(kScText); + callSym->SetSKind(kStFunc); + Insn &callInsn = AppendCall(*callSym, *srcOpnds, true); + static_cast(callInsn).SetRefSkipIndex(stOffset); + GetCurBB()->SetHasCall(); + if (frequency != 0) { + GetCurBB()->SetFrequency(frequency); + } + return; + } + + bool needSkip = false; + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + + AArch64ImmOperand *beginOpnd = + &CreateImmOperand(memLayout->GetRefLocBaseLoc() + kIntregBytelen * formalRef, k64BitSize, true); + AArch64ImmOperand *countOpnd = &CreateImmOperand(refNum, k64BitSize, true); + int32 refSkipIndex = -1; + if (!begin && retRef != nullptr) { + AArch64SymbolAlloc *symLoc = + static_cast(memLayout->GetSymAllocInfo(retRef->GetStIndex())); + int32 stOffset = symLoc->GetOffset() / kOffsetAlign; + refSkipIndex = stOffset; + if (stOffset == 0) { + /* ret_ref at begin. */ + beginOpnd = &CreateImmOperand(memLayout->GetRefLocBaseLoc() + kIntregBytelen, k64BitSize, true); + countOpnd = &CreateImmOperand(refNum - 1, k64BitSize, true); + } else if (stOffset == (refNum - 1)) { + /* ret_ref at end. */ + countOpnd = &CreateImmOperand(refNum - 1, k64BitSize, true); + } else { + needSkip = true; + } + } + + regno_t vRegNO0 = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &vReg0 = CreateVirtualRegisterOperand(vRegNO0); + RegOperand &fpOpnd = GetOrCreateStackBaseRegOperand(); + SelectAdd(vReg0, fpOpnd, *beginOpnd, PTY_i64); + + AArch64RegOperand &parmRegOpnd1 = GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + srcOpnds->PushOpnd(parmRegOpnd1); + SelectCopy(parmRegOpnd1, PTY_a64, vReg0, PTY_a64); + + regno_t vRegNO1 = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &vReg1 = CreateVirtualRegisterOperand(vRegNO1); + SelectCopyImm(vReg1, *countOpnd, PTY_i64); + + AArch64RegOperand &parmRegOpnd2 = GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + srcOpnds->PushOpnd(parmRegOpnd2); + SelectCopy(parmRegOpnd2, PTY_a64, vReg1, PTY_a64); + + MIRSymbol *sym = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + if (begin) { + std::string funcName("MCC_InitializeLocalStackRef"); + sym->SetNameStrIdx(funcName); + CHECK_FATAL(countOpnd->GetValue() > 0, "refCount should be greater than 0."); + refCount = static_cast(countOpnd->GetValue()); + beginOffset = beginOpnd->GetValue(); + } else if (!needSkip) { + std::string funcName("MCC_CleanupLocalStackRef_NaiveRCFast"); + sym->SetNameStrIdx(funcName); + } else { + CHECK_NULL_FATAL(retRef); + if (retRef->GetStIndex() >= memLayout->GetSymAllocTable().size()) { + CHECK_FATAL(false, "index out of range in AArch64CGFunc::HandleRCCall"); + } + AArch64SymbolAlloc *symLoc = static_cast(memLayout->GetSymAllocInfo(retRef->GetStIndex())); + int32 stOffset = symLoc->GetOffset() / kOffsetAlign; + AArch64ImmOperand &retLoc = CreateImmOperand(stOffset, k64BitSize, true); + + regno_t vRegNO2 = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &vReg2 = CreateVirtualRegisterOperand(vRegNO2); + SelectCopyImm(vReg2, retLoc, PTY_i64); + + AArch64RegOperand &parmRegOpnd3 = GetOrCreatePhysicalRegisterOperand(R2, k64BitSize, GetRegTyFromPrimTy(PTY_a64)); + srcOpnds->PushOpnd(parmRegOpnd3); + SelectCopy(parmRegOpnd3, PTY_a64, vReg2, PTY_a64); + + std::string funcName("MCC_CleanupLocalStackRefSkip_NaiveRCFast"); + sym->SetNameStrIdx(funcName); + } + sym->SetStorageClass(kScText); + sym->SetSKind(kStFunc); + + Insn &callInsn = AppendCall(*sym, *srcOpnds, true); + static_cast(callInsn).SetRefSkipIndex(refSkipIndex); + if (frequency != 0) { + GetCurBB()->SetFrequency(frequency); + } + GetCurBB()->SetHasCall(); + if (begin) { + /* Insert Yield Point just after localrefvar are initialized. */ + GenerateYieldpoint(*GetCurBB()); + yieldPointInsn = GetCurBB()->GetLastInsn(); + } +} + +void AArch64CGFunc::SelectParmListDreadSmallAggregate(MIRSymbol &sym, MIRType &structType, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator) { + /* + * in two param regs if possible + * If struct is <= 8 bytes, then it fits into one param reg. + * If struct is <= 16 bytes, then it fits into two param regs. + * Otherwise, it goes onto the stack. + * If the number of available param reg is less than what is + * needed to fit the entire struct into them, then the param + * reg is skipped and the struct goes onto the stack. + * Example 1. + * struct size == 8 bytes. + * param regs x0 to x6 are used. + * struct is passed in x7. + * Example 2. + * struct is 16 bytes. + * param regs x0 to x5 are used. + * struct is passed in x6 and x7. + * Example 3. + * struct is 16 bytes. + * param regs x0 to x6 are used. x7 alone is not enough to pass the struct. + * struct is passed on the stack. + * x7 is not used, as the following param will go onto the stack also. + */ + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); + PLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + if (ploc.reg0 == 0) { + /* No param regs available, pass on stack. */ + /* If symSize is <= 8 bytes then use 1 reg, else 2 */ + CreateCallStructParamPassByStack(symSize, &sym, nullptr, ploc.memOffset); + } else { + /* pass by param regs. */ + MemOperand &mopnd0 = GetOrCreateMemOpnd(sym, 0, k64BitSize); + CreateCallStructParamPassByReg(ploc.reg0, mopnd0, srcOpnds); + if (ploc.reg1) { + MemOperand &mopnd1 = GetOrCreateMemOpnd(sym, kSizeOfPtr, k64BitSize); + CreateCallStructParamPassByReg(ploc.reg1, mopnd1, srcOpnds); + } + } +} + +void AArch64CGFunc::SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, + AArch64ListOperand &srcOpnds, ParmLocator &parmLocator) { + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); + RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); + RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); + PLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + if (ploc.reg0 == 0) { + /* No param regs available, pass on stack. */ + CreateCallStructParamPassByStack(symSize, nullptr, addrOpnd1, ploc.memOffset); + } else { + /* pass by param regs. */ + AArch64OfstOperand *offOpnd0 = &GetOrCreateOfstOpnd(0, k32BitSize); + MemOperand *mopnd = + &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, addrOpnd1, nullptr, offOpnd0, nullptr); + CreateCallStructParamPassByReg(ploc.reg0, *mopnd, srcOpnds); + if (ploc.reg1) { + AArch64OfstOperand *offOpnd1 = &GetOrCreateOfstOpnd(kSizeOfPtr, k32BitSize); + mopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, addrOpnd1, nullptr, offOpnd1, nullptr); + CreateCallStructParamPassByReg(ploc.reg1, *mopnd, srcOpnds); + } + } +} + +void AArch64CGFunc::SelectParmListDreadLargeAggregate(MIRSymbol &sym, MIRType &structType, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator, int32 &structCopyOffset) { + /* + * Pass larger sized struct on stack. + * Need to copy the entire structure onto the stack. + * The pointer to the starting address of the copied struct is then + * used as the parameter for the struct. + * This pointer is passed as the next parameter. + * Example 1: + * struct is 23 bytes. + * param regs x0 to x5 are used. + * First around up 23 to 24, so 3 of 8-byte slots. + * Copy struct to a created space on the stack. + * Pointer of copied struct is passed in x6. + * Example 2: + * struct is 25 bytes. + * param regs x0 to x7 are used. + * First around up 25 to 32, so 4 of 8-byte slots. + * Copy struct to a created space on the stack. + * Pointer of copied struct is passed on stack as the 9th parameter. + */ + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); + PLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); /* round up */ + /* Create the struct copies. */ + AArch64RegOperand *parmOpnd = CreateCallStructParamCopyToStack(numMemOp, &sym, nullptr, structCopyOffset, ploc.reg0); + srcOpnds.PushOpnd(*parmOpnd); + structCopyOffset += (numMemOp * kSizeOfPtr); +} + +void AArch64CGFunc::SelectParmListIreadLargeAggregate(const IreadNode &iread, MIRType &structType, + AArch64ListOperand &srcOpnds, ParmLocator &parmLocator, + int32 &structCopyOffset) { + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); + RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); + RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); + PLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); /* round up */ + AArch64RegOperand *parmOpnd = + CreateCallStructParamCopyToStack(numMemOp, nullptr, addrOpnd1, structCopyOffset, ploc.reg0); + structCopyOffset += (numMemOp * kSizeOfPtr); + srcOpnds.PushOpnd(*parmOpnd); +} + +void AArch64CGFunc::CreateCallStructParamPassByStack(int32 symSize, MIRSymbol *sym, + RegOperand *addrOpnd, int32 baseOffset) { + MemOperand *ldMopnd = nullptr; + MemOperand *stMopnd = nullptr; + int numRegNeeded = (symSize <= k8ByteSize) ? kOneRegister : kTwoRegister; + for (int j = 0; j < numRegNeeded; j++) { + if (sym) { + ldMopnd = &GetOrCreateMemOpnd(*sym, (j * static_cast(kSizeOfPtr)), k64BitSize); + } else { + ldMopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, addrOpnd, nullptr, + &GetOrCreateOfstOpnd(static_cast(j) * kSizeOfPtr, k32BitSize), nullptr); + } + RegOperand *vreg = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + GetCurBB()->AppendInsn(cg->BuildInstruction(PickLdInsn(k64BitSize, PTY_i64), *vreg, *ldMopnd)); + stMopnd = &CreateMemOpnd(RSP, (baseOffset + (j * kSizeOfPtr)), k64BitSize); + GetCurBB()->AppendInsn(cg->BuildInstruction(PickStInsn(k64BitSize, PTY_i64), *vreg, *stMopnd)); + } +} + +void AArch64CGFunc::CreateCallStructParamPassByReg(AArch64reg reg, MemOperand &memOpnd, AArch64ListOperand &srcOpnds) { + AArch64RegOperand &parmOpnd = GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyInt); + GetCurBB()->AppendInsn(cg->BuildInstruction(PickLdInsn(k64BitSize, PTY_i64), parmOpnd, memOpnd)); + srcOpnds.PushOpnd(parmOpnd); +} + +void AArch64CGFunc::CreateCallStructParamMemcpy(const MIRSymbol *sym, RegOperand *addropnd, + uint32 structSize, int32 copyOffset, int32 fromOffset) { + std::vector opndVec; + + RegOperand *vreg1 = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8BitSize)); + opndVec.push_back(vreg1); /* result */ + + RegOperand *parmOpnd = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8BitSize)); + RegOperand *spReg = &GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + AArch64ImmOperand *offsetOpnd0 = &CreateImmOperand(copyOffset, k64BitSize, false); + SelectAdd(*parmOpnd, *spReg, *offsetOpnd0, PTY_a64); + opndVec.push_back(parmOpnd); /* param 0 */ + + if (sym != nullptr) { + if (sym->GetStorageClass() == kScGlobal || sym->GetStorageClass() == kScExtern) { + StImmOperand &stopnd = CreateStImmOperand(*sym, 0, 0); + AArch64RegOperand &staddropnd = static_cast(CreateRegisterOperandOfType(PTY_u64)); + SelectAddrof(staddropnd, stopnd); + opndVec.push_back(&staddropnd); /* param 1 */ + } else if (sym->GetStorageClass() == kScAuto || sym->GetStorageClass() == kScFormal) { + RegOperand *parm1Reg = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + AArch64SymbolAlloc *symloc = static_cast(GetMemlayout()->GetSymAllocInfo(sym->GetStIndex())); + AArch64RegOperand *baseOpnd = static_cast(GetBaseReg(*symloc)); + int32 stoffset = GetBaseOffset(*symloc); + AArch64ImmOperand *offsetOpnd1 = &CreateImmOperand(stoffset, k64BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *parm1Reg, *baseOpnd, *offsetOpnd1)); + if (sym->GetStorageClass() == kScFormal) { + MemOperand *ldmopnd = + &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, parm1Reg, nullptr, + &GetOrCreateOfstOpnd(0, k32BitSize), static_cast(nullptr)); + RegOperand *tmpreg = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + RegOperand *vreg2 = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickLdInsn(k64BitSize, PTY_a64), + *tmpreg, *ldmopnd)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *vreg2, *tmpreg, + CreateImmOperand(fromOffset, k64BitSize, false))); + parm1Reg = vreg2; + } + opndVec.push_back(parm1Reg); /* param 1 */ + } else if (sym->GetStorageClass() == kScPstatic || sym->GetStorageClass() == kScFstatic) { + CHECK_FATAL(sym->GetSKind() != kStConst, "Unsupported sym const for struct param"); + StImmOperand *stopnd = &CreateStImmOperand(*sym, 0, 0); + AArch64RegOperand &staddropnd = static_cast(CreateRegisterOperandOfType(PTY_u64)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrp, staddropnd, *stopnd)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xadrpl12, staddropnd, staddropnd, *stopnd)); + opndVec.push_back(&staddropnd); /* param 1 */ + } else { + CHECK_FATAL(0, "Unsupported sym for struct param"); + } + } else { + opndVec.push_back(addropnd); /* param 1 */ + } + + RegOperand &vreg3 = CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8BitSize)); + AArch64ImmOperand &sizeOpnd = CreateImmOperand(structSize, k64BitSize, false); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xmovri32, vreg3, sizeOpnd)); + opndVec.push_back(&vreg3); /* param 2 */ + + SelectLibCall("memcpy", opndVec, PTY_a64, PTY_a64); +} + +AArch64RegOperand *AArch64CGFunc::CreateCallStructParamCopyToStack(uint32 numMemOp, MIRSymbol *sym, RegOperand *addrOpd, + int32 copyOffset, AArch64reg reg) { + /* Create the struct copies. */ + MemOperand *ldMopnd = nullptr; + MemOperand *stMopnd = nullptr; + for (int j = 0; j < numMemOp; j++) { + if (sym != nullptr) { + ldMopnd = &GetOrCreateMemOpnd(*sym, (j * static_cast(kSizeOfPtr)), k64BitSize); + } else { + ldMopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, addrOpd, nullptr, + &GetOrCreateOfstOpnd(static_cast(j) * kSizeOfPtr, k32BitSize), nullptr); + } + RegOperand *vreg = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + GetCurBB()->AppendInsn(cg->BuildInstruction(PickLdInsn(k64BitSize, PTY_i64), *vreg, *ldMopnd)); + + stMopnd = &CreateMemOpnd(RSP, (copyOffset + (j * kSizeOfPtr)), k64BitSize); + GetCurBB()->AppendInsn(cg->BuildInstruction(PickStInsn(k64BitSize, PTY_i64), *vreg, *stMopnd)); + } + /* Create the copy address parameter for the struct */ + AArch64RegOperand *parmOpnd = &GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyInt); + AArch64ImmOperand *offset = &CreateImmOperand(copyOffset, k64BitSize, false); + RegOperand *fpopnd = &GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + SelectAdd(*parmOpnd, *fpopnd, *offset, PTY_a64); + + return parmOpnd; +} + +void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, + ParmLocator &parmLocator, AArch64ListOperand &srcOpnds) { + RegOperand &spReg = GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + AArch64ImmOperand &offsetOpnd = CreateImmOperand(structCopyOffset, k64BitSize, false); + + PLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + if (ploc.reg0 != 0) { + RegOperand &res = GetOrCreatePhysicalRegisterOperand(ploc.reg0, k64BitSize, kRegTyInt); + SelectAdd(res, spReg, offsetOpnd, PTY_a64); + srcOpnds.PushOpnd(res); + } else { + RegOperand &parmOpnd = CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + SelectAdd(parmOpnd, spReg, offsetOpnd, PTY_a64); + MemOperand &stmopnd = CreateMemOpnd(RSP, ploc.memOffset, k64BitSize); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickStInsn(k64BitSize, PTY_i64), parmOpnd, stmopnd)); + } +} + +void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, AArch64ListOperand &srcOpnds, + ParmLocator &parmLocator, int32 &structCopyOffset) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { + DreadNode &dread = static_cast(argExpr); + MIRSymbol *sym = GetBecommon().GetMIRModule().CurFunction()->GetLocalOrGlobalSymbol(dread.GetStIdx()); + MIRType *ty = sym->GetType(); + if (dread.GetFieldID() != 0) { + MIRStructType *structty = static_cast(ty); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(structty->GetFieldTyIdx(dread.GetFieldID())); + rhsOffset = GetBecommon().GetFieldOffset(*structty, dread.GetFieldID()).first; + } + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); + if (symSize <= k16ByteSize) { + SelectParmListDreadSmallAggregate(*sym, *ty, srcOpnds, parmLocator); + } else if (symSize > kParmMemcpySize) { + CreateCallStructParamMemcpy(sym, nullptr, symSize, structCopyOffset, rhsOffset); + CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); + structCopyOffset += RoundUp(symSize, kSizeOfPtr); + } else { + SelectParmListDreadLargeAggregate(*sym, *ty, srcOpnds, parmLocator, structCopyOffset); + } + } else if (argExpr.GetOpCode() == OP_iread) { + IreadNode &iread = static_cast(argExpr); + MIRPtrType *pointerty = static_cast(GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread.GetTyIdx())); + MIRType *ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerty->GetPointedTyIdx()); + if (iread.GetFieldID() != 0) { + MIRStructType *structty = static_cast(ty); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(structty->GetFieldTyIdx(iread.GetFieldID())); + rhsOffset = GetBecommon().GetFieldOffset(*structty, iread.GetFieldID()).first; + } + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); + if (symSize <= k16ByteSize) { + SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, parmLocator); + } else if (symSize > kParmMemcpySize) { + RegOperand *ireadOpnd = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); + RegOperand *addrOpnd = &LoadIntoRegister(*ireadOpnd, iread.Opnd(0)->GetPrimType()); + if (rhsOffset > 0) { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd, *addrOpnd, + CreateImmOperand(rhsOffset, k64BitSize, false))); + } + + CreateCallStructParamMemcpy(nullptr, addrOpnd, symSize, structCopyOffset, rhsOffset); + CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); + structCopyOffset += RoundUp(symSize, kSizeOfPtr); + } else { + SelectParmListIreadLargeAggregate(iread, *ty, srcOpnds, parmLocator, structCopyOffset); + } + } else { + CHECK_FATAL(0, "NYI"); + } +} + +uint32 AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { + if (naryNode.GetOpCode() == OP_call) { + CallNode &callNode = static_cast(naryNode); + MIRFunction *callFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); + TyIdx retIdx = callFunc->GetReturnTyIdx(); + if ((GetBecommon().GetTypeSize(retIdx.GetIdx()) == 0) && GetBecommon().HasFuncReturnType(*callFunc)) { + return GetBecommon().GetTypeSize(GetBecommon().GetFuncReturnType(*callFunc)); + } + } else if (naryNode.GetOpCode() == OP_icall) { + IcallNode &icallNode = static_cast(naryNode); + CallReturnVector *p2nrets = &icallNode.GetReturnVec(); + if (p2nrets->size() == k1ByteSize) { + StIdx stIdx = (*p2nrets)[0].first; + MIRSymbol *sym = GetBecommon().GetMIRModule().CurFunction()->GetSymTab()->GetSymbolFromStIdx(stIdx.Idx()); + if (sym != nullptr) { + return GetBecommon().GetTypeSize(sym->GetTyIdx().GetIdx()); + } + } + } + return 0; +} + +/* + SelectParmList generates an instrunction for each of the parameters + to load the parameter value into the corresponding register. + We return a list of registers to the call instruction because + they may be needed in the register allocation phase. + */ +void AArch64CGFunc::SelectParmList(StmtNode &naryNode, AArch64ListOperand &srcOpnds, bool isCallNative) { + ParmLocator parmLocator(GetBecommon()); + PLocInfo ploc; + size_t i = 0; + if ((naryNode.GetOpCode() == OP_icall) || isCallNative) { + i++; + } + + int32 structCopyOffset = GetMaxParamStackSize() - GetStructCopySize(); + for (uint32 pnum = 0; i < naryNode.NumOpnds(); ++i, ++pnum) { + MIRType *ty = nullptr; + BaseNode *argExpr = naryNode.Opnd(i); + PrimType primType = argExpr->GetPrimType(); + ASSERT(primType != PTY_void, "primType should not be void"); + /* use alloca */ + if (primType == PTY_agg) { + SelectParmListForAggregate(*argExpr, srcOpnds, parmLocator, structCopyOffset); + continue; + } + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; + RegOperand *expRegOpnd = nullptr; + Operand *opnd = HandleExpr(naryNode, *argExpr); + if (!opnd->IsRegister()) { + opnd = &LoadIntoRegister(*opnd, primType); + } + expRegOpnd = static_cast(opnd); + + if ((pnum == 0) && (SelectParmListGetStructReturnSize(naryNode) > k16ByteSize)) { + parmLocator.InitPLocInfo(ploc); + ploc.reg0 = R8; + } else { + parmLocator.LocateNextParm(*ty, ploc); + } + if (ploc.reg0 != kRinvalid) { /* load to the register. */ + CHECK_FATAL(expRegOpnd != nullptr, "null ptr check"); + AArch64RegOperand &parmRegOpnd = GetOrCreatePhysicalRegisterOperand(ploc.reg0, expRegOpnd->GetSize(), + GetRegTyFromPrimTy(primType)); + SelectCopy(parmRegOpnd, primType, *expRegOpnd, primType); + srcOpnds.PushOpnd(parmRegOpnd); + } else { /* store to the memory segment for stack-passsed arguments. */ + Operand &actMemOpnd = CreateMemOpnd(RSP, ploc.memOffset, GetPrimTypeBitSize(primType)); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(GetPrimTypeBitSize(primType), primType), *expRegOpnd, + actMemOpnd)); + } + ASSERT(ploc.reg1 == 0, "SelectCall NYI"); + } +} + +/* + * for MCC_DecRefResetPair(addrof ptr %Reg17_R5592, addrof ptr %Reg16_R6202) or + * MCC_ClearLocalStackRef(addrof ptr %Reg17_R5592), the parameter (addrof ptr xxx) is converted to asm as follow: + * add vreg, x29, #imm + * mov R0/R1, vreg + * this function is used to prepare parameters, the generated vreg is returned, and #imm is saved in offsetValue. + */ +Operand *AArch64CGFunc::SelectClearStackCallParam(const AddrofNode &expr, int64 &offsetValue) { + MIRSymbol *symbol = GetMirModule().CurFunction()->GetLocalOrGlobalSymbol(expr.GetStIdx()); + PrimType ptype = expr.GetPrimType(); + regno_t vRegNO = NewVReg(kRegTyInt, GetPrimTypeSize(ptype)); + Operand &result = CreateVirtualRegisterOperand(vRegNO); + CHECK_FATAL(expr.GetFieldID() == 0, "the fieldID of parameter in clear stack reference call must be 0"); + if (!GetCG()->IsQuiet()) { + maple::LogInfo::MapleLogger(kLlErr) << + "Warning: we expect AddrOf with StImmOperand is not used for local variables"; + } + auto *symLoc = static_cast(GetMemlayout()->GetSymAllocInfo(symbol->GetStIndex())); + AArch64ImmOperand *offset = nullptr; + if (symLoc->GetMemSegment()->GetMemSegmentKind() == kMsArgsStkPassed) { + offset = &CreateImmOperand(GetBaseOffset(*symLoc), k64BitSize, false, kUnAdjustVary); + } else if (symLoc->GetMemSegment()->GetMemSegmentKind() == kMsRefLocals) { + auto it = immOpndsRequiringOffsetAdjustmentForRefloc.find(symLoc); + if (it != immOpndsRequiringOffsetAdjustmentForRefloc.end()) { + offset = (*it).second; + } else { + offset = &CreateImmOperand(GetBaseOffset(*symLoc), k64BitSize, false); + immOpndsRequiringOffsetAdjustmentForRefloc[symLoc] = offset; + } + } else { + CHECK_FATAL(false, "the symLoc of parameter in clear stack reference call is unreasonable"); + } + offsetValue = offset->GetValue(); + SelectAdd(result, *GetBaseReg(*symLoc), *offset, PTY_u64); + if (GetCG()->GenerateVerboseCG()) { + /* Add a comment */ + Insn *insn = GetCurBB()->GetLastInsn(); + std::string comm = "local/formal var: "; + comm.append(symbol->GetName()); + insn->SetComment(comm); + } + return &result; +} + +/* select paramters for MCC_DecRefResetPair and MCC_ClearLocalStackRef function */ +void AArch64CGFunc::SelectClearStackCallParmList(const StmtNode &naryNode, AArch64ListOperand &srcOpnds, + std::vector &stackPostion) { + ParmLocator parmLocator(GetBecommon()); + PLocInfo ploc; + for (size_t i = 0; i < naryNode.NumOpnds(); ++i) { + MIRType *ty = nullptr; + BaseNode *argExpr = naryNode.Opnd(i); + PrimType primType = argExpr->GetPrimType(); + ASSERT(primType != PTY_void, "primType check"); + /* use alloc */ + CHECK_FATAL(primType != PTY_agg, "the type of argument is unreasonable"); + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; + CHECK_FATAL(argExpr->GetOpCode() == OP_addrof, "the argument of clear stack call is unreasonable"); + auto *expr = static_cast(argExpr); + int64 offsetValue = 0; + Operand *opnd = SelectClearStackCallParam(*expr, offsetValue); + stackPostion.emplace_back(offsetValue); + auto *expRegOpnd = static_cast(opnd); + parmLocator.LocateNextParm(*ty, ploc); + CHECK_FATAL(ploc.reg0 != 0, "the parameter of ClearStackCall must be passed by register"); + CHECK_FATAL(expRegOpnd != nullptr, "null ptr check"); + AArch64RegOperand &parmRegOpnd = GetOrCreatePhysicalRegisterOperand(ploc.reg0, expRegOpnd->GetSize(), + GetRegTyFromPrimTy(primType)); + SelectCopy(parmRegOpnd, primType, *expRegOpnd, primType); + srcOpnds.PushOpnd(parmRegOpnd); + ASSERT(ploc.reg1 == 0, "SelectCall NYI"); + } +} + +/* + * intrinsify Unsafe.getAndAddInt and Unsafe.getAndAddLong + * generate an intrinsic instruction instead of a function call + * intrinsic_get_add_int w0, xt, ws, ws, x1, x2, w3, label + */ +void AArch64CGFunc::IntrinsifyGetAndAddInt(AArch64ListOperand &srcOpnds, PrimType pty) { + MapleList &opnds = srcOpnds.GetOperands(); + /* Unsafe.getAndAddInt has more than 4 parameters */ + ASSERT(opnds.size() >= 4, "ensure the operands number"); + auto iter = opnds.begin(); + RegOperand *objOpnd = *(++iter); + RegOperand *offOpnd = *(++iter); + RegOperand *deltaOpnd = *(++iter); + auto &retVal = static_cast(GetTargetRetOperand(pty, -1)); + LabelIdx labIdx = CreateLabel(); + LabelOperand &targetOpnd = GetOrCreateLabelOperand(labIdx); + RegOperand &tempOpnd0 = CreateRegisterOperandOfType(PTY_i64); + RegOperand &tempOpnd1 = CreateRegisterOperandOfType(pty); + RegOperand &tempOpnd2 = CreateRegisterOperandOfType(PTY_i32); + MOperator mOp = (pty == PTY_i64) ? MOP_get_and_addL : MOP_get_and_addI; + std::vector intrnOpnds; + intrnOpnds.emplace_back(&retVal); + intrnOpnds.emplace_back(&tempOpnd0); + intrnOpnds.emplace_back(&tempOpnd1); + intrnOpnds.emplace_back(&tempOpnd2); + intrnOpnds.emplace_back(objOpnd); + intrnOpnds.emplace_back(offOpnd); + intrnOpnds.emplace_back(deltaOpnd); + intrnOpnds.emplace_back(&targetOpnd); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, intrnOpnds)); +} + +/* + * intrinsify Unsafe.getAndSetInt and Unsafe.getAndSetLong + * generate an intrinsic instruction instead of a function call + */ +void AArch64CGFunc::IntrinsifyGetAndSetInt(AArch64ListOperand &srcOpnds, PrimType pty) { + MapleList &opnds = srcOpnds.GetOperands(); + /* Unsafe.getAndSetInt has 4 parameters */ + ASSERT(opnds.size() == 4, "ensure the operands number"); + auto iter = opnds.begin(); + RegOperand *objOpnd = *(++iter); + RegOperand *offOpnd = *(++iter); + RegOperand *newValueOpnd = *(++iter); + auto &retVal = static_cast(GetTargetRetOperand(pty, -1)); + LabelIdx labIdx = CreateLabel(); + LabelOperand &targetOpnd = GetOrCreateLabelOperand(labIdx); + RegOperand &tempOpnd0 = CreateRegisterOperandOfType(PTY_i64); + RegOperand &tempOpnd1 = CreateRegisterOperandOfType(PTY_i32); + + MOperator mOp = (pty == PTY_i64) ? MOP_get_and_setL : MOP_get_and_setI; + std::vector intrnOpnds; + intrnOpnds.emplace_back(&retVal); + intrnOpnds.emplace_back(&tempOpnd0); + intrnOpnds.emplace_back(&tempOpnd1); + intrnOpnds.emplace_back(objOpnd); + intrnOpnds.emplace_back(offOpnd); + intrnOpnds.emplace_back(newValueOpnd); + intrnOpnds.emplace_back(&targetOpnd); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, intrnOpnds)); +} + +/* + * intrinsify Unsafe.compareAndSwapInt and Unsafe.compareAndSwapLong + * generate an intrinsic instruction instead of a function call + */ +void AArch64CGFunc::IntrinsifyCompareAndSwapInt(AArch64ListOperand &srcOpnds, PrimType pty) { + MapleList &opnds = srcOpnds.GetOperands(); + /* Unsafe.compareAndSwapInt has more than 5 parameters */ + ASSERT(opnds.size() >= 5, "ensure the operands number"); + auto iter = opnds.begin(); + RegOperand *objOpnd = *(++iter); + RegOperand *offOpnd = *(++iter); + RegOperand *expectedValueOpnd = *(++iter); + RegOperand *newValueOpnd = *(++iter); + auto &retVal = static_cast(GetTargetRetOperand(PTY_i64, -1)); + RegOperand &tempOpnd0 = CreateRegisterOperandOfType(PTY_i64); + RegOperand &tempOpnd1 = CreateRegisterOperandOfType(pty); + LabelIdx labIdx1 = CreateLabel(); + LabelOperand &label1Opnd = GetOrCreateLabelOperand(labIdx1); + LabelIdx labIdx2 = CreateLabel(); + LabelOperand &label2Opnd = GetOrCreateLabelOperand(labIdx2); + MOperator mOp = (pty == PTY_i32) ? MOP_compare_and_swapI : MOP_compare_and_swapL; + std::vector intrnOpnds; + intrnOpnds.emplace_back(&retVal); + intrnOpnds.emplace_back(&tempOpnd0); + intrnOpnds.emplace_back(&tempOpnd1); + intrnOpnds.emplace_back(objOpnd); + intrnOpnds.emplace_back(offOpnd); + intrnOpnds.emplace_back(expectedValueOpnd); + intrnOpnds.emplace_back(newValueOpnd); + intrnOpnds.emplace_back(&label1Opnd); + intrnOpnds.emplace_back(&label2Opnd); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, intrnOpnds)); +} + +/* + * the lowest bit of count field is used to indicate whether or not the string is compressed + * if the string is not compressed, jump to jumpLabIdx + */ +RegOperand *AArch64CGFunc::CheckStringIsCompressed(BB &bb, RegOperand &str, int32 countOffset, PrimType countPty, + LabelIdx jumpLabIdx) { + MemOperand &memOpnd = CreateMemOpnd(str, countOffset, str.GetSize()); + uint32 bitSize = GetPrimTypeBitSize(countPty); + MOperator loadOp = PickLdInsn(bitSize, countPty); + RegOperand &countOpnd = CreateRegisterOperandOfType(countPty); + bb.AppendInsn(GetCG()->BuildInstruction(loadOp, countOpnd, memOpnd)); + ImmOperand &immValueOne = CreateImmOperand(countPty, 1); + RegOperand &countLowestBitOpnd = CreateRegisterOperandOfType(countPty); + MOperator andOp = bitSize == k64BitSize ? MOP_xandrri13 : MOP_wandrri12; + bb.AppendInsn(GetCG()->BuildInstruction(andOp, countLowestBitOpnd, countOpnd, immValueOne)); + AArch64RegOperand &wzr = AArch64RegOperand::GetZeroRegister(bitSize); + MOperator cmpOp = (bitSize == k64BitSize) ? MOP_xcmprr : MOP_wcmprr; + Operand &rflag = GetOrCreateRflag(); + bb.AppendInsn(GetCG()->BuildInstruction(cmpOp, rflag, wzr, countLowestBitOpnd)); + bb.AppendInsn(GetCG()->BuildInstruction(MOP_beq, rflag, GetOrCreateLabelOperand(jumpLabIdx))); + bb.SetKind(BB::kBBIf); + return &countOpnd; +} + +/* + * count field stores the length shifted one bit to the left + * if the length is less than eight, jump to jumpLabIdx + */ +RegOperand *AArch64CGFunc::CheckStringLengthLessThanEight(BB &bb, RegOperand &countOpnd, PrimType countPty, + LabelIdx jumpLabIdx) { + RegOperand &lengthOpnd = CreateRegisterOperandOfType(countPty); + uint32 bitSize = GetPrimTypeBitSize(countPty); + MOperator lsrOp = (bitSize == k64BitSize) ? MOP_xlsrrri6 : MOP_wlsrrri5; + ImmOperand &immValueOne = CreateImmOperand(countPty, 1); + bb.AppendInsn(GetCG()->BuildInstruction(lsrOp, lengthOpnd, countOpnd, immValueOne)); + constexpr int kConstIntEight = 8; + ImmOperand &immValueEight = CreateImmOperand(countPty, kConstIntEight); + MOperator cmpImmOp = (bitSize == k64BitSize) ? MOP_xcmpri : MOP_wcmpri; + Operand &rflag = GetOrCreateRflag(); + bb.AppendInsn(GetCG()->BuildInstruction(cmpImmOp, rflag, lengthOpnd, immValueEight)); + bb.AppendInsn(GetCG()->BuildInstruction(MOP_blt, rflag, GetOrCreateLabelOperand(jumpLabIdx))); + bb.SetKind(BB::kBBIf); + return &lengthOpnd; +} + +void AArch64CGFunc::GenerateIntrnInsnForStrIndexOf(BB &bb, RegOperand &srcString, RegOperand &patternString, + RegOperand &srcCountOpnd, RegOperand &patternLengthOpnd, + PrimType countPty, LabelIdx jumpLabIdx) { + RegOperand &srcLengthOpnd = CreateRegisterOperandOfType(countPty); + ImmOperand &immValueOne = CreateImmOperand(countPty, 1); + uint32 bitSize = GetPrimTypeBitSize(countPty); + MOperator lsrOp = (bitSize == k64BitSize) ? MOP_xlsrrri6 : MOP_wlsrrri5; + bb.AppendInsn(GetCG()->BuildInstruction(lsrOp, srcLengthOpnd, srcCountOpnd, immValueOne)); +#ifdef USE_32BIT_REF + const int64 stringBaseObjSize = 16; /* shadow(4)+monitor(4)+count(4)+hash(4) */ +#else + const int64 stringBaseObjSize = 20; /* shadow(8)+monitor(4)+count(4)+hash(4) */ +#endif /* USE_32BIT_REF */ + PrimType pty = (srcString.GetSize() == k64BitSize) ? PTY_i64 : PTY_i32; + ImmOperand &immStringBaseOffset = CreateImmOperand(pty, stringBaseObjSize); + MOperator addOp = (pty == PTY_i64) ? MOP_xaddrri12 : MOP_waddrri12; + RegOperand &srcStringBaseOpnd = CreateRegisterOperandOfType(pty); + bb.AppendInsn(GetCG()->BuildInstruction(addOp, srcStringBaseOpnd, srcString, immStringBaseOffset)); + RegOperand &patternStringBaseOpnd = CreateRegisterOperandOfType(pty); + bb.AppendInsn(GetCG()->BuildInstruction(addOp, patternStringBaseOpnd, patternString, + immStringBaseOffset)); + auto &retVal = static_cast(GetTargetRetOperand(PTY_i32, -1)); + std::vector intrnOpnds; + intrnOpnds.emplace_back(&retVal); + intrnOpnds.emplace_back(&srcStringBaseOpnd); + intrnOpnds.emplace_back(&srcLengthOpnd); + intrnOpnds.emplace_back(&patternStringBaseOpnd); + intrnOpnds.emplace_back(&patternLengthOpnd); + const uint32 tmpRegOperandNum = 6; + for (uint32 i = 0; i < tmpRegOperandNum - 1; ++i) { + RegOperand &tmpOpnd = CreateRegisterOperandOfType(PTY_i64); + intrnOpnds.emplace_back(&tmpOpnd); + } + intrnOpnds.emplace_back(&CreateRegisterOperandOfType(PTY_i32)); + const uint32 labelNum = 7; + for (uint32 i = 0; i < labelNum; ++i) { + LabelIdx labIdx = CreateLabel(); + LabelOperand &labelOpnd = GetOrCreateLabelOperand(labIdx); + intrnOpnds.emplace_back(&labelOpnd); + } + bb.AppendInsn(GetCG()->BuildInstruction(MOP_string_indexof, intrnOpnds)); + bb.AppendInsn(GetCG()->BuildInstruction(MOP_xuncond, GetOrCreateLabelOperand(jumpLabIdx))); + bb.SetKind(BB::kBBGoto); +} + +/* + * intrinsify String.indexOf + * generate an intrinsic instruction instead of a function call if both the source string and the specified substring + * are compressed and the length of the substring is not less than 8, i.e. + * bl String.indexOf, srcString, patternString ===>> + * + * ldr srcCountOpnd, [srcString, offset] + * and srcCountLowestBitOpnd, srcCountOpnd, #1 + * cmp wzr, srcCountLowestBitOpnd + * beq Label.call + * ldr patternCountOpnd, [patternString, offset] + * and patternCountLowestBitOpnd, patternCountOpnd, #1 + * cmp wzr, patternCountLowestBitOpnd + * beq Label.call + * lsr patternLengthOpnd, patternCountOpnd, #1 + * cmp patternLengthOpnd, #8 + * blt Label.call + * lsr srcLengthOpnd, srcCountOpnd, #1 + * add srcStringBaseOpnd, srcString, immStringBaseOffset + * add patternStringBaseOpnd, patternString, immStringBaseOffset + * intrinsic_string_indexof retVal, srcStringBaseOpnd, srcLengthOpnd, patternStringBaseOpnd, patternLengthOpnd, + * tmpOpnd1, tmpOpnd2, tmpOpnd3, tmpOpnd4, tmpOpnd5, tmpOpnd6, + * label1, label2, label3, lable3, label4, label5, label6, label7 + * b Label.joint + * Label.call: + * bl String.indexOf, srcString, patternString + * Label.joint: + */ +void AArch64CGFunc::IntrinsifyStringIndexOf(AArch64ListOperand &srcOpnds, const MIRSymbol &funcSym) { + MapleList &opnds = srcOpnds.GetOperands(); + /* String.indexOf opnd size must be more than 2 */ + ASSERT(opnds.size() >= 2, "ensure the operands number"); + auto iter = opnds.begin(); + RegOperand *srcString = *iter; + RegOperand *patternString = *(++iter); + GStrIdx gStrIdx = GlobalTables::GetStrTable().GetStrIdxFromName(namemangler::kJavaLangStringStr); + MIRType *type = + GlobalTables::GetTypeTable().GetTypeFromTyIdx(GlobalTables::GetTypeNameTable().GetTyIdxFromGStrIdx(gStrIdx)); + auto stringType = static_cast(type); + CHECK_FATAL(stringType != nullptr, "Ljava_2Flang_2FString_3B type can not be null"); + FieldID fieldID = GetMirModule().GetMIRBuilder()->GetStructFieldIDFromFieldNameParentFirst(stringType, "count"); + MIRType *fieldType = stringType->GetFieldType(fieldID); + PrimType countPty = fieldType->GetPrimType(); + int32 offset = GetBecommon().GetFieldOffset(*stringType, fieldID).first; + LabelIdx callBBLabIdx = CreateLabel(); + RegOperand *srcCountOpnd = CheckStringIsCompressed(*GetCurBB(), *srcString, offset, countPty, callBBLabIdx); + + BB *srcCompressedBB = CreateNewBB(); + GetCurBB()->AppendBB(*srcCompressedBB); + RegOperand *patternCountOpnd = CheckStringIsCompressed(*srcCompressedBB, *patternString, offset, countPty, + callBBLabIdx); + + BB *patternCompressedBB = CreateNewBB(); + RegOperand *patternLengthOpnd = CheckStringLengthLessThanEight(*patternCompressedBB, *patternCountOpnd, countPty, + callBBLabIdx); + + BB *intrinsicBB = CreateNewBB(); + LabelIdx jointLabIdx = CreateLabel(); + GenerateIntrnInsnForStrIndexOf(*intrinsicBB, *srcString, *patternString, *srcCountOpnd, *patternLengthOpnd, + countPty, jointLabIdx); + + BB *callBB = CreateNewBB(); + callBB->AddLabel(callBBLabIdx); + SetLab2BBMap(callBBLabIdx, *callBB); + SetCurBB(*callBB); + Insn &callInsn = AppendCall(funcSym, srcOpnds); + MIRType *retType = funcSym.GetFunction()->GetReturnType(); + if (retType != nullptr) { + callInsn.SetRetSize(retType->GetSize()); + } + GetFunction().SetHasCall(); + + BB *jointBB = CreateNewBB(); + jointBB->AddLabel(jointLabIdx); + SetLab2BBMap(jointLabIdx, *jointBB); + srcCompressedBB->AppendBB(*patternCompressedBB); + patternCompressedBB->AppendBB(*intrinsicBB); + intrinsicBB->AppendBB(*callBB); + callBB->AppendBB(*jointBB); + SetCurBB(*jointBB); +} +void AArch64CGFunc::SelectCall(CallNode &callNode) { + MIRFunction *fn = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); + MIRSymbol *fsym = GetFunction().GetLocalOrGlobalSymbol(fn->GetStIdx(), false); + MIRType *retType = fn->GetReturnType(); + + if (GetCG()->GenerateVerboseCG()) { + const std::string &comment = fsym->GetName(); + GetCurBB()->AppendInsn(CreateCommentInsn(comment)); + } + + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + bool callNative = false; + if ((fsym->GetName() == "MCC_CallFastNative") || (fsym->GetName() == "MCC_CallFastNativeExt") || + (fsym->GetName() == "MCC_CallSlowNative0") || (fsym->GetName() == "MCC_CallSlowNative1") || + (fsym->GetName() == "MCC_CallSlowNative2") || (fsym->GetName() == "MCC_CallSlowNative3") || + (fsym->GetName() == "MCC_CallSlowNative4") || (fsym->GetName() == "MCC_CallSlowNative5") || + (fsym->GetName() == "MCC_CallSlowNative6") || (fsym->GetName() == "MCC_CallSlowNative7") || + (fsym->GetName() == "MCC_CallSlowNative8") || (fsym->GetName() == "MCC_CallSlowNativeExt")) { + callNative = true; + } + + std::vector stackPosition; + if ((fsym->GetName() == "MCC_DecRefResetPair") || (fsym->GetName() == "MCC_ClearLocalStackRef")) { + SelectClearStackCallParmList(callNode, *srcOpnds, stackPosition); + } else { + SelectParmList(callNode, *srcOpnds, callNative); + } + if (callNative) { + GetCurBB()->AppendInsn(CreateCommentInsn("call native func")); + + BaseNode *funcArgExpr = callNode.Opnd(0); + PrimType ptype = funcArgExpr->GetPrimType(); + Operand *funcOpnd = HandleExpr(callNode, *funcArgExpr); + AArch64RegOperand &livein = GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, + GetRegTyFromPrimTy(PTY_a64)); + SelectCopy(livein, ptype, *funcOpnd, ptype); + + AArch64RegOperand &extraOpnd = GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, kRegTyInt); + srcOpnds->PushOpnd(extraOpnd); + } + const std::string &funcName = fsym->GetName(); + if (Globals::GetInstance()->GetOptimLevel() >= CGOptions::kLevel2 && + funcName == "Ljava_2Flang_2FString_3B_7CindexOf_7C_28Ljava_2Flang_2FString_3B_29I") { + GStrIdx strIdx = GlobalTables::GetStrTable().GetStrIdxFromName(funcName); + MIRSymbol *st = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(strIdx, true); + IntrinsifyStringIndexOf(*srcOpnds, *st); + return; + } + Insn &callInsn = AppendCall(*fsym, *srcOpnds); + GetCurBB()->SetHasCall(); + if (retType != nullptr) { + callInsn.SetRetSize(retType->GetSize()); + callInsn.SetIsCallReturnUnsigned(IsUnsignedInteger(retType->GetPrimType())); + } + + GetFunction().SetHasCall(); + if ((fsym->GetName() == "MCC_ThrowException") || (fsym->GetName() == "MCC_RethrowException") || + (fsym->GetName() == "MCC_ThrowArithmeticException") || + (fsym->GetName() == "MCC_ThrowArrayIndexOutOfBoundsException") || + (fsym->GetName() == "MCC_ThrowNullPointerException") || + (fsym->GetName() == "MCC_ThrowStringIndexOutOfBoundsException") || (fsym->GetName() == "abort") || + (fsym->GetName() == "exit") || (fsym->GetName() == "MCC_Array_Boundary_Check")) { + callInsn.SetIsThrow(true); + GetCurBB()->SetKind(BB::kBBThrow); + } else if ((fsym->GetName() == "MCC_DecRefResetPair") || (fsym->GetName() == "MCC_ClearLocalStackRef")) { + for (size_t i = 0; i < stackPosition.size(); ++i) { + callInsn.SetClearStackOffset(i, stackPosition[i]); + } + } +} + +void AArch64CGFunc::SelectIcall(IcallNode &icallNode, Operand &srcOpnd) { + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + SelectParmList(icallNode, *srcOpnds); + + Operand *fptrOpnd = &srcOpnd; + if (fptrOpnd->GetKind() != Operand::kOpdRegister) { + PrimType ty = icallNode.Opnd(0)->GetPrimType(); + fptrOpnd = &SelectCopy(srcOpnd, ty, ty); + } + ASSERT(fptrOpnd->IsRegister(), "SelectIcall: function pointer not RegOperand"); + RegOperand *regOpnd = static_cast(fptrOpnd); + Insn &callInsn = GetCG()->BuildInstruction(MOP_xblr, *regOpnd, *srcOpnds); + + MIRType *retType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallNode.GetRetTyIdx()); + if (retType != nullptr) { + callInsn.SetRetSize(retType->GetSize()); + callInsn.SetIsCallReturnUnsigned(IsUnsignedInteger(retType->GetPrimType())); + } + + GetCurBB()->AppendInsn(callInsn); + GetCurBB()->SetHasCall(); + ASSERT(GetCurBB()->GetLastInsn()->IsCall(), "lastInsn should be a call"); + GetFunction().SetHasCall(); +} + +void AArch64CGFunc::HandleCatch() { + if (Globals::GetInstance()->GetOptimLevel() >= 1) { + regno_t regNO = uCatch.regNOCatch; + RegOperand &vregOpnd = GetOrCreateVirtualRegisterOperand(regNO); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xmovrr, vregOpnd, + GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt))); + } else { + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickStInsn(uCatch.opndCatch->GetSize(), PTY_a64), + GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt), *uCatch.opndCatch)); + } +} + +void AArch64CGFunc::SelectMembar(StmtNode &membar) { + switch (membar.GetOpCode()) { + case OP_membaracquire: + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_dmb_ishld)); + break; + case OP_membarrelease: + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_dmb_ish)); + break; + case OP_membarstoreload: + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_dmb_ish)); + break; + case OP_membarstorestore: + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_dmb_ishst)); + break; + default: + ASSERT(false, "NYI"); + break; + } +} + +void AArch64CGFunc::SelectComment(CommentNode &comment) { + GetCurBB()->AppendInsn(CreateCommentInsn(comment.GetComment())); +} + +void AArch64CGFunc::SelectReturn(Operand *opnd0) { + ReturnMechanism retMech(*(GetFunction().GetReturnType()), GetBecommon()); + if (retMech.GetRegCount() > 0) { + CHECK_FATAL(opnd0 != nullptr, "opnd0 must not be nullptr"); + if (opnd0->IsRegister()) { + RegOperand *regOpnd = static_cast(opnd0); + if (regOpnd->GetRegisterNumber() != retMech.GetReg0()) { + AArch64RegOperand &retOpnd = + GetOrCreatePhysicalRegisterOperand(retMech.GetReg0(), regOpnd->GetSize(), + GetRegTyFromPrimTy(retMech.GetPrimTypeOfReg0())); + SelectCopy(retOpnd, retMech.GetPrimTypeOfReg0(), *regOpnd, retMech.GetPrimTypeOfReg0()); + } + } else if (opnd0->IsMemoryAccessOperand()) { + AArch64MemOperand *memopnd = static_cast(opnd0); + AArch64RegOperand &retOpnd = GetOrCreatePhysicalRegisterOperand(retMech.GetReg0(), + GetPrimTypeBitSize(retMech.GetPrimTypeOfReg0()), GetRegTyFromPrimTy(retMech.GetPrimTypeOfReg0())); + MOperator mOp = PickLdInsn(memopnd->GetSize(), retMech.GetPrimTypeOfReg0()); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, retOpnd, *memopnd)); + } else if (opnd0->IsConstImmediate()) { + ImmOperand *immOpnd = static_cast(opnd0); + AArch64RegOperand &retOpnd = GetOrCreatePhysicalRegisterOperand(retMech.GetReg0(), + GetPrimTypeBitSize(retMech.GetPrimTypeOfReg0()), GetRegTyFromPrimTy(retMech.GetPrimTypeOfReg0())); + SelectCopy(retOpnd, retMech.GetPrimTypeOfReg0(), *immOpnd, retMech.GetPrimTypeOfReg0()); + } else { + CHECK_FATAL(false, "nyi"); + } + } else if (opnd0 != nullptr) { /* pass in memory */ + CHECK_FATAL(false, "SelectReturn: return in memory NYI"); + } + GetExitBBsVec().emplace_back(GetCurBB()); +} + +RegOperand &AArch64CGFunc::GetOrCreateSpecialRegisterOperand(PregIdx sregIdx, PrimType primType) { + AArch64reg reg = R0; + switch (sregIdx) { + case kSregSp: + reg = RSP; + break; + case kSregFp: + reg = RFP; + break; + case kSregThrownval: { /* uses x0 == R0 */ + ASSERT(uCatch.regNOCatch > 0, "regNOCatch should greater than 0."); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + RegOperand ®Opnd = GetOrCreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8BitSize)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction( + PickLdInsn(uCatch.opndCatch->GetSize(), PTY_a64), regOpnd, *uCatch.opndCatch)); + return regOpnd; + } else { + return GetOrCreateVirtualRegisterOperand(uCatch.regNOCatch); + } + } + case kSregRetval0: + if (!IsPrimitiveInteger(primType)) { + reg = V0; + } + break; + case kSregMethodhdl: + if (methodHandleVreg == regno_t(-1)) { + methodHandleVreg = NewVReg(kRegTyInt, k8BitSize); + } + return GetOrCreateVirtualRegisterOperand(methodHandleVreg); + default: + ASSERT(false, "Special pseudo registers NYI"); + break; + } + return GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyInt); +} + +AArch64RegOperand &AArch64CGFunc::GetOrCreatePhysicalRegisterOperand(AArch64reg regNO, uint32 size, + RegType kind, uint32 flag) { + size = (size <= k32BitSize) ? k32BitSize : k64BitSize; + + auto it = phyRegOperandTable.find(AArch64RegOperand(regNO, size, kind, flag)); + if (it != phyRegOperandTable.end()) { + return *(it->second); + } + + AArch64RegOperand *o = memPool->New(regNO, size, kind, flag); + phyRegOperandTable[*o] = o; + return *o; +} + +const LabelOperand *AArch64CGFunc::GetLabelOperand(LabelIdx labIdx) const { + const MapleUnorderedMap::const_iterator it = hashLabelOpndTable.find(labIdx); + if (it != hashLabelOpndTable.end()) { + return it->second; + } + return nullptr; +} + +LabelOperand &AArch64CGFunc::GetOrCreateLabelOperand(LabelIdx labIdx) { + MapleUnorderedMap::iterator it = hashLabelOpndTable.find(labIdx); + if (it != hashLabelOpndTable.end()) { + return *(it->second); + } + const char *funcName = GetShortFuncName().c_str(); + LabelOperand *res = memPool->New(funcName, labIdx); + hashLabelOpndTable[labIdx] = res; + return *res; +} + +LabelOperand &AArch64CGFunc::GetOrCreateLabelOperand(BB &bb) { + LabelIdx labelIdx = bb.GetLabIdx(); + if (labelIdx == MIRLabelTable::GetDummyLabel()) { + labelIdx = CreateLabel(); + bb.AddLabel(labelIdx); + } + return GetOrCreateLabelOperand(labelIdx); +} + +LabelOperand &AArch64CGFunc::CreateFuncLabelOperand(const MIRSymbol &funcSymbol) { + const char *funcName = memPool->New(funcSymbol.GetName())->c_str(); + return *memPool->New(funcName); +} + +AArch64OfstOperand &AArch64CGFunc::GetOrCreateOfstOpnd(uint32 offset, uint32 size) { + AArch64OfstOperand tOfstOpnd(offset, size); + auto it = hashOfstOpndTable.find(tOfstOpnd); + if (it != hashOfstOpndTable.end()) { + return *it->second; + } + AArch64OfstOperand *res = memPool->New(offset, size); + hashOfstOpndTable[tOfstOpnd] = res; + return *res; +} + +MemOperand &AArch64CGFunc::GetOrCreateMemOpnd(const MIRSymbol &symbol, int32 offset, uint32 size, bool forLocalRef) { + MIRStorageClass storageClass = symbol.GetStorageClass(); + if ((storageClass == kScAuto) || (storageClass == kScFormal)) { + AArch64SymbolAlloc *symLoc = + static_cast(GetMemlayout()->GetSymAllocInfo(symbol.GetStIndex())); + if (forLocalRef) { + auto p = GetMemlayout()->GetLocalRefLocMap().find(symbol.GetStIdx()); + CHECK_FATAL(p != GetMemlayout()->GetLocalRefLocMap().end(), "sym loc should have been defined"); + symLoc = static_cast(p->second); + } + ASSERT(symLoc != nullptr, "sym loc should have been defined"); + /* At this point, we don't know which registers the callee needs to save. */ + ASSERT((IsFPLRAddedToCalleeSavedList() || (SizeOfCalleeSaved() == 0)), + "CalleeSaved won't be known until after Register Allocation"); + StIdx idx = symbol.GetStIdx(); + auto it = memOpndsRequiringOffsetAdjustment.find(idx); + ASSERT((!IsFPLRAddedToCalleeSavedList() || + ((it != memOpndsRequiringOffsetAdjustment.end()) || (storageClass == kScFormal))), + "Memory operand of this symbol should have been added to the hash table"); + int32 stOffset = GetBaseOffset(*symLoc); + if (it != memOpndsRequiringOffsetAdjustment.end()) { + if (GetMemlayout()->IsLocalRefLoc(symbol)) { + if (!forLocalRef) { + return *(it->second); + } + } else if (mirModule.IsJavaModule()) { + return *(it->second); + } else { + Operand* offOpnd = (it->second)->GetOffset(); + if (((static_cast(offOpnd))->GetOffsetValue() == (stOffset + offset)) && + (it->second->GetSize() == size)) { + return *(it->second); + } + } + } + it = memOpndsForStkPassedArguments.find(idx); + if (it != memOpndsForStkPassedArguments.end()) { + if (GetMemlayout()->IsLocalRefLoc(symbol)) { + if (!forLocalRef) { + return *(it->second); + } + } else { + return *(it->second); + } + } + + AArch64RegOperand *baseOpnd = static_cast(GetBaseReg(*symLoc)); + int32 totalOffset = stOffset + offset; + /* needs a fresh copy of OfstOperand as we may adjust its offset at a later stage. */ + AArch64OfstOperand *offsetOpnd = memPool->New(totalOffset, k64BitSize); + if (symLoc->GetMemSegment()->GetMemSegmentKind() == kMsArgsStkPassed && + AArch64MemOperand::IsPIMMOffsetOutOfRange(totalOffset, size)) { + AArch64ImmOperand *offsetOprand; + offsetOprand = &CreateImmOperand(totalOffset, k64BitSize, true, kUnAdjustVary); + Operand *resImmOpnd = &SelectCopy(*offsetOprand, PTY_i64, PTY_i64); + return *memPool->New(AArch64MemOperand::kAddrModeBOrX, size, *baseOpnd, + static_cast(*resImmOpnd), nullptr, symbol, true); + } else { + if (symLoc->GetMemSegment()->GetMemSegmentKind() == kMsArgsStkPassed) { + offsetOpnd->SetVary(kUnAdjustVary); + } + AArch64MemOperand *res = memPool->New(AArch64MemOperand::kAddrModeBOi, size, *baseOpnd, + nullptr, offsetOpnd, &symbol); + if ((symbol.GetType()->GetKind() != kTypeClass) && !forLocalRef) { + memOpndsRequiringOffsetAdjustment[idx] = res; + } + return *res; + } + } else if ((storageClass == kScGlobal) || (storageClass == kScExtern)) { + StImmOperand &stOpnd = CreateStImmOperand(symbol, offset, 0); + AArch64RegOperand &stAddrOpnd = static_cast(CreateRegisterOperandOfType(PTY_u64)); + SelectAddrof(stAddrOpnd, stOpnd); + /* AArch64MemOperand::AddrMode_B_OI */ + return *memPool->New(AArch64MemOperand::kAddrModeBOi, size, stAddrOpnd, + nullptr, &GetOrCreateOfstOpnd(0, k32BitSize), &symbol); + } else if ((storageClass == kScPstatic) || (storageClass == kScFstatic)) { + if (symbol.GetSKind() == kStConst) { + ASSERT(offset == 0, "offset should be 0 for constant literals"); + return *memPool->New(AArch64MemOperand::kAddrModeLiteral, size, symbol); + } else { + StImmOperand &stOpnd = CreateStImmOperand(symbol, offset, 0); + AArch64RegOperand &stAddrOpnd = static_cast(CreateRegisterOperandOfType(PTY_u64)); + /* adrp x1, _PTR__cinf_Ljava_2Flang_2FSystem_3B */ + Insn &insn = GetCG()->BuildInstruction(MOP_xadrp, stAddrOpnd, stOpnd); + GetCurBB()->AppendInsn(insn); + /* ldr x1, [x1, #:lo12:_PTR__cinf_Ljava_2Flang_2FSystem_3B] */ + return *memPool->New(AArch64MemOperand::kAddrModeLo12Li, size, stAddrOpnd, nullptr, + &GetOrCreateOfstOpnd(offset, k32BitSize), &symbol); + } + } else { + CHECK_FATAL(false, "NYI"); + } +} + +AArch64MemOperand &AArch64CGFunc::GetOrCreateMemOpnd(AArch64MemOperand::AArch64AddressingMode mode, uint32 size, + RegOperand *base, RegOperand *index, OfstOperand *offset, + const MIRSymbol *st) { + ASSERT(base != nullptr, "nullptr check"); + AArch64MemOperand tMemOpnd(mode, size, *base, index, offset, st); + auto it = hashMemOpndTable.find(tMemOpnd); + if (it != hashMemOpndTable.end()) { + return *(it->second); + } + AArch64MemOperand *res = memPool->New(tMemOpnd); + hashMemOpndTable[tMemOpnd] = res; + return *res; +} + +AArch64MemOperand &AArch64CGFunc::GetOrCreateMemOpnd(AArch64MemOperand::AArch64AddressingMode mode, uint32 size, + RegOperand *base, RegOperand *index, int32 shift, + bool isSigned) { + ASSERT(base != nullptr, "nullptr check"); + AArch64MemOperand tMemOpnd(mode, size, *base, *index, shift, isSigned); + auto it = hashMemOpndTable.find(tMemOpnd); + if (it != hashMemOpndTable.end()) { + return *(it->second); + } + AArch64MemOperand *res = memPool->New(tMemOpnd); + hashMemOpndTable[tMemOpnd] = res; + return *res; +} + +/* offset: base offset from FP or SP */ +MemOperand &AArch64CGFunc::CreateMemOpnd(RegOperand &baseOpnd, int32 offset, uint32 size) { + AArch64OfstOperand &offsetOpnd = CreateOfstOpnd(offset, k32BitSize); + if (!ImmOperand::IsInBitSizeRot(kMaxImmVal12Bits, offset)) { + Operand *resImmOpnd = &SelectCopy(CreateImmOperand(offset, k32BitSize, true), PTY_i32, PTY_i32); + return *memPool->New(AArch64MemOperand::kAddrModeBOi, size, baseOpnd, + static_cast(resImmOpnd), nullptr, nullptr); + } else { + ASSERT(!AArch64MemOperand::IsPIMMOffsetOutOfRange(offset, size), "should not be PIMMOffsetOutOfRange"); + return *memPool->New(AArch64MemOperand::kAddrModeBOi, size, baseOpnd, + nullptr, &offsetOpnd, nullptr); + } +} + +/* offset: base offset + #:lo12:Label+immediate */ +MemOperand &AArch64CGFunc::CreateMemOpnd(RegOperand &baseOpnd, int32 offset, uint32 size, const MIRSymbol &sym) { + AArch64OfstOperand &offsetOpnd = CreateOfstOpnd(offset, k32BitSize); + ASSERT(ImmOperand::IsInBitSizeRot(kMaxImmVal12Bits, offset), ""); + return *memPool->New(AArch64MemOperand::kAddrModeBOi, size, baseOpnd, nullptr, &offsetOpnd, &sym); +} + +RegOperand &AArch64CGFunc::GenStructParamIndex(RegOperand &base, const BaseNode &indexExpr, int shift) { + RegOperand *index = &LoadIntoRegister(*HandleExpr(indexExpr, *(indexExpr.Opnd(0))), PTY_a64); + RegOperand *srcOpnd = &CreateRegisterOperandOfType(PTY_a64); + ImmOperand *imm = &CreateImmOperand(PTY_a64, shift); + SelectShift(*srcOpnd, *index, *imm, kShiftLeft, PTY_a64); + RegOperand *result = &CreateRegisterOperandOfType(PTY_a64); + SelectAdd(*result, base, *srcOpnd, PTY_a64); + + AArch64OfstOperand *offopnd = &CreateOfstOpnd(0, k32BitSize); + AArch64MemOperand &mo = + GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, result, nullptr, offopnd, nullptr); + RegOperand &structAddr = CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + GetCurBB()->AppendInsn(cg->BuildInstruction(MOP_xldr, structAddr, mo)); + + return structAddr; +} + +/* iread a64 <* <* void>> 0 (add a64 ( + * addrof a64 $__reg_jni_func_tab$$libcore_all_dex, + * mul a64 ( + * cvt a64 i32 (constval i32 21), + * constval a64 8))) + */ +MemOperand *AArch64CGFunc::CheckAndCreateExtendMemOpnd(PrimType ptype, BaseNode &addrExpr, int32 offset, + AArch64isa::MemoryOrdering memOrd) { + aggParamReg = nullptr; + if (memOrd != AArch64isa::kMoNone || !IsPrimitiveInteger(ptype) || addrExpr.GetOpCode() != OP_add || offset != 0) { + return nullptr; + } + BaseNode *baseExpr = addrExpr.Opnd(0); + BaseNode *addendExpr = addrExpr.Opnd(1); + if (addendExpr->GetOpCode() != OP_mul) { + return nullptr; + } + BaseNode *indexExpr, *scaleExpr; + indexExpr = addendExpr->Opnd(0); + scaleExpr = addendExpr->Opnd(1); + if (scaleExpr->GetOpCode() != OP_constval) { + return nullptr; + } + ConstvalNode *constValNode = static_cast(scaleExpr); + CHECK_FATAL(constValNode->GetConstVal()->GetKind() == kConstInt, "expect MIRIntConst"); + MIRIntConst *mirIntConst = safe_cast(constValNode->GetConstVal()); + CHECK_FATAL(mirIntConst != nullptr, "just checking"); + int32 scale = mirIntConst->GetValue(); + if (scale < 0) { + return nullptr; + } + uint32 unsignedScale = static_cast(scale); + if (unsignedScale != GetPrimTypeSize(ptype) || indexExpr->GetOpCode() != OP_cvt) { + return nullptr; + } + /* 8 is 1 << 3; 4 is 1 << 2; 2 is 1 << 1; 1 is 1 << 0 */ + int32 shift = (unsignedScale == 8) ? 3 : ((unsignedScale == 4) ? 2 : ((unsignedScale == 2) ? 1 : 0)); + RegOperand &base = static_cast(LoadIntoRegister(*HandleExpr(addrExpr, *baseExpr), PTY_a64)); + TypeCvtNode *typeCvtNode = static_cast(indexExpr); + PrimType fromType = typeCvtNode->FromType(); + PrimType toType = typeCvtNode->GetPrimType(); + if (isAggParamInReg) { + aggParamReg = &GenStructParamIndex(base, *indexExpr, shift); + return nullptr; + } + MemOperand *memOpnd = nullptr; + if ((fromType == PTY_i32) && (toType == PTY_a64)) { + RegOperand &index = + static_cast(LoadIntoRegister(*HandleExpr(*indexExpr, *indexExpr->Opnd(0)), PTY_i32)); + memOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, GetPrimTypeBitSize(ptype), &base, &index, + shift, true); + } else if ((fromType == PTY_u32) && (toType == PTY_a64)) { + RegOperand &index = + static_cast(LoadIntoRegister(*HandleExpr(*indexExpr, *indexExpr->Opnd(0)), PTY_u32)); + memOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, GetPrimTypeBitSize(ptype), &base, &index, + shift, false); + } + return memOpnd; +} + +/* + * Create a memory operand with specified data type and memory ordering, making + * use of aarch64 extend register addressing mode when possible. + */ +MemOperand &AArch64CGFunc::CreateMemOpnd(PrimType ptype, const BaseNode &parent, BaseNode &addrExpr, int32 offset, + AArch64isa::MemoryOrdering memOrd) { + MemOperand *memOpnd = CheckAndCreateExtendMemOpnd(ptype, addrExpr, offset, memOrd); + if (memOpnd != nullptr) { + return *memOpnd; + } + Operand *addrOpnd = HandleExpr(parent, addrExpr); + addrOpnd = static_cast(&LoadIntoRegister(*addrOpnd, PTY_a64)); + if ((addrExpr.GetOpCode() == OP_CG_array_elem_add) && (offset == 0) && GetCurBB() && GetCurBB()->GetLastInsn() && + (GetCurBB()->GetLastInsn()->GetMachineOpcode() == MOP_xadrpl12)) { + Operand &opnd = GetCurBB()->GetLastInsn()->GetOperand(kInsnThirdOpnd); + StImmOperand &stOpnd = static_cast(opnd); + + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(stOpnd.GetOffset(), k32BitSize); + MemOperand &tmpMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeLo12Li, GetPrimTypeBitSize(ptype), + static_cast(addrOpnd), nullptr, &ofstOpnd, stOpnd.GetSymbol()); + GetCurBB()->RemoveInsn(*GetCurBB()->GetLastInsn()); + return tmpMemOpnd; + } else { + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(offset, k32BitSize); + return GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, GetPrimTypeBitSize(ptype), + static_cast(addrOpnd), nullptr, &ofstOpnd, nullptr); + } +} + +Operand &AArch64CGFunc::GetOrCreateFuncNameOpnd(const MIRSymbol &symbol) { + return *memPool->New(symbol); +} + +Operand &AArch64CGFunc::GetOrCreateRflag() { + if (rcc == nullptr) { + rcc = &CreateRflagOperand(); + } + return *rcc; +} + +const Operand *AArch64CGFunc::GetRflag() const { + return rcc; +} + +Operand &AArch64CGFunc::GetOrCreatevaryreg() { + if (vary == nullptr) { + regno_t vRegNO = NewVReg(kRegTyVary, k8ByteSize); + vary = &CreateVirtualRegisterOperand(vRegNO); + } + return *vary; +} + +/* the first operand in opndvec is return opnd */ +void AArch64CGFunc::SelectLibCall(const std::string &funcName, std::vector &opndVec, PrimType primType, + PrimType retPrimType, bool is2ndRet) { + MIRSymbol *st = GlobalTables::GetGsymTable().CreateSymbol(kScopeGlobal); + st->SetNameStrIdx(funcName); + st->SetStorageClass(kScExtern); + st->SetSKind(kStFunc); + /* setup the type of the callee function */ + std::vector vec; + std::vector vecAt; + for (size_t i = 1; i < opndVec.size(); ++i) { + vec.emplace_back(GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]->GetTypeIndex()); + vecAt.emplace_back(TypeAttrs()); + } + + MIRType *retType = GlobalTables::GetTypeTable().GetTypeTable().at(static_cast(primType)); + st->SetTyIdx(GetBecommon().BeGetOrCreateFunctionType(retType->GetTypeIndex(), vec, vecAt)->GetTypeIndex()); + + if (GetCG()->GenerateVerboseCG()) { + const std::string &comment = "lib call : " + funcName; + GetCurBB()->AppendInsn(CreateCommentInsn(comment)); + } + + ParmLocator parmLocator(GetBecommon()); + PLocInfo ploc; + ASSERT(primType != PTY_void, "primType check"); + /* setup actual parameters */ + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + for (size_t i = 1; i < opndVec.size(); ++i) { + MIRType *ty; + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; + Operand *stOpnd = opndVec[i]; + if (stOpnd->GetKind() != Operand::kOpdRegister) { + stOpnd = &SelectCopy(*stOpnd, primType, primType); + } + RegOperand *expRegOpnd = static_cast(stOpnd); + parmLocator.LocateNextParm(*ty, ploc); + if (ploc.reg0 != 0) { /* load to the register */ + AArch64RegOperand &parmRegOpnd = + GetOrCreatePhysicalRegisterOperand(ploc.reg0, expRegOpnd->GetSize(), GetRegTyFromPrimTy(primType)); + SelectCopy(parmRegOpnd, primType, *expRegOpnd, primType); + srcOpnds->PushOpnd(parmRegOpnd); + } + ASSERT(ploc.reg1 == 0, "SelectCall NYI"); + } + + MIRSymbol *sym = GetFunction().GetLocalOrGlobalSymbol(st->GetStIdx(), false); + Insn &callInsn = AppendCall(*sym, *srcOpnds); + MIRType *callRetType = GlobalTables::GetTypeTable().GetTypeTable().at(static_cast(retPrimType)); + if (callRetType != nullptr) { + callInsn.SetRetSize(callRetType->GetSize()); + callInsn.SetIsCallReturnUnsigned(IsUnsignedInteger(callRetType->GetPrimType())); + } + GetFunction().SetHasCall(); + /* get return value */ + Operand *opnd0 = opndVec[0]; + ReturnMechanism retMech(*(GlobalTables::GetTypeTable().GetTypeTable().at(retPrimType)), GetBecommon()); + if (retMech.GetRegCount() <= 0) { + CHECK_FATAL(false, "should return from register"); + } + if (!opnd0->IsRegister()) { + CHECK_FATAL(false, "nyi"); + } + RegOperand *regOpnd = static_cast(opnd0); + AArch64reg regNum = is2ndRet ? retMech.GetReg1() : retMech.GetReg0(); + if (regOpnd->GetRegisterNumber() != regNum) { + AArch64RegOperand &retOpnd = GetOrCreatePhysicalRegisterOperand(regNum, regOpnd->GetSize(), + GetRegTyFromPrimTy(retPrimType)); + SelectCopy(*opnd0, retPrimType, retOpnd, retPrimType); + } +} + +Operand *AArch64CGFunc::GetBaseReg(const AArch64SymbolAlloc &symAlloc) { + MemSegmentKind sgKind = symAlloc.GetMemSegment()->GetMemSegmentKind(); + ASSERT(((sgKind == kMsArgsRegPassed) || (sgKind == kMsLocals) || (sgKind == kMsRefLocals) || + (sgKind == kMsArgsToStkPass) || (sgKind == kMsArgsStkPassed)), "NYI"); + + if (sgKind == kMsArgsStkPassed) { + return &GetOrCreatevaryreg(); + } + + if (fsp == nullptr) { + fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + } + return fsp; +} + +int32 AArch64CGFunc::GetBaseOffset(const SymbolAlloc &sa) { + const AArch64SymbolAlloc *symAlloc = static_cast(&sa); + /* Call Frame layout of AArch64 + * Refer to V2 in aarch64_memlayout.h. + * Do Not change this unless you know what you do + */ + const int32 sizeofFplr = 2 * kIntregBytelen; + MemSegmentKind sgKind = symAlloc->GetMemSegment()->GetMemSegmentKind(); + AArch64MemLayout *memLayout = static_cast(this->GetMemlayout()); + if (sgKind == kMsArgsStkPassed) { /* for callees */ + int32 offset = static_cast(symAlloc->GetOffset()); + return offset; + } else if (sgKind == kMsArgsRegPassed) { + int32 baseOffset = memLayout->GetSizeOfLocals() + symAlloc->GetOffset() + memLayout->GetSizeOfRefLocals(); + return baseOffset + sizeofFplr; + } else if (sgKind == kMsRefLocals) { + int32 baseOffset = symAlloc->GetOffset() + memLayout->GetSizeOfLocals(); + return baseOffset + sizeofFplr; + } else if (sgKind == kMsLocals) { + int32 baseOffset = symAlloc->GetOffset(); + return baseOffset + sizeofFplr; + } else if (sgKind == kMsSpillReg) { + int32 baseOffset = symAlloc->GetOffset() + memLayout->SizeOfArgsRegisterPassed() + memLayout->GetSizeOfLocals() + + memLayout->GetSizeOfRefLocals(); + return baseOffset + sizeofFplr; + } else if (sgKind == kMsArgsToStkPass) { /* this is for callers */ + return static_cast(symAlloc->GetOffset()); + } else { + CHECK_FATAL(false, "sgKind check"); + } + return 0; +} + +void AArch64CGFunc::AppendCall(const MIRSymbol &funcSymbol) { + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + AppendCall(funcSymbol, *srcOpnds); +} + +void AArch64CGFunc::SelectAddAfterInsn(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType, + bool isDest, Insn &insn) { + uint32 dsize = GetPrimTypeBitSize(primType); + bool is64Bits = (dsize == k64BitSize); + ASSERT(opnd0.GetKind() == Operand::kOpdRegister, "Spill memory operand should based on register"); + ASSERT((opnd1.GetKind() == Operand::kOpdImmediate || opnd1.GetKind() == Operand::kOpdOffset), + "Spill memory operand should be with a immediate offset."); + + AArch64ImmOperand *immOpnd = static_cast(&opnd1); + ASSERT(!immOpnd->IsNegative(), "Spill offset should be positive number."); + + MOperator mOpCode = MOP_undef; + /* lower 24 bits has 1, higher bits are all 0 */ + if (immOpnd->IsInBitSize(kMaxImmVal24Bits, 0)) { + /* lower 12 bits and higher 12 bits both has 1 */ + Operand *newOpnd0 = &opnd0; + if (!(immOpnd->IsInBitSize(kMaxImmVal12Bits, 0) || + immOpnd->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits))) { + /* process higher 12 bits */ + ImmOperand &immOpnd2 = + CreateImmOperand(static_cast(static_cast(immOpnd->GetValue()) >> kMaxImmVal12Bits), + immOpnd->GetSize(), immOpnd->IsSignedValue()); + mOpCode = is64Bits ? MOP_xaddrri24 : MOP_waddrri24; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, opnd0, immOpnd2, addSubLslOperand); + if (isDest) { + insn.GetBB()->InsertInsnAfter(insn, newInsn); + } else { + insn.GetBB()->InsertInsnBefore(insn, newInsn); + } + /* get lower 12 bits value */ + immOpnd->ModuloByPow2(static_cast(kMaxImmVal12Bits)); + newOpnd0 = &resOpnd; + } + /* process lower 12 bits value */ + mOpCode = is64Bits ? MOP_xaddrri12 : MOP_waddrri12; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, *newOpnd0, *immOpnd); + if (isDest) { + insn.GetBB()->InsertInsnAfter(insn, newInsn); + } else { + insn.GetBB()->InsertInsnBefore(insn, newInsn); + } + } else { + /* load into register */ + RegOperand ®Opnd = CreateRegisterOperandOfType(primType); + SelectCopyImm(regOpnd, *immOpnd, primType); + mOpCode = is64Bits ? MOP_xaddrrr : MOP_waddrrr; + Insn &newInsn = GetCG()->BuildInstruction(mOpCode, resOpnd, opnd0, regOpnd); + if (isDest) { + insn.GetBB()->InsertInsnAfter(insn, newInsn); + } else { + insn.GetBB()->InsertInsnBefore(insn, newInsn); + } + } +} + +MemOperand *AArch64CGFunc::AdjustMemOperandIfOffsetOutOfRange( + MemOperand *memOpnd, regno_t vrNum, bool isDest, Insn &insn, AArch64reg regNum, bool &isOutOfRange) { + if (vrNum >= vRegTable.size()) { + CHECK_FATAL(false, "index out of range in AArch64CGFunc::AdjustMemOperandIfOffsetOutOfRange"); + } + uint32 dataSize = vRegTable[vrNum].GetSize() * kBitsPerByte; + auto *a64MemOpnd = static_cast(memOpnd); + if (IsImmediateOffsetOutOfRange(*a64MemOpnd, dataSize)) { + if (CheckIfSplitOffsetWithAdd(*a64MemOpnd, dataSize)) { + isOutOfRange = true; + } + memOpnd = + &SplitOffsetWithAddInstruction(*a64MemOpnd, dataSize, regNum, isDest, &insn); + } else { + isOutOfRange = false; + } + return memOpnd; +} + +void AArch64CGFunc::FreeSpillRegMem(regno_t vrNum) { + MemOperand *memOpnd = nullptr; + + auto p = spillRegMemOperands.find(vrNum); + if (p != spillRegMemOperands.end()) { + memOpnd = p->second; + } + + if ((memOpnd == nullptr) && IsVRegNOForPseudoRegister(vrNum)) { + auto pSecond = pRegSpillMemOperands.find(GetPseudoRegIdxFromVirtualRegNO(vrNum)); + if (pSecond != pRegSpillMemOperands.end()) { + memOpnd = pSecond->second; + } + } + + if (memOpnd == nullptr) { + ASSERT(false, "free spillreg have no mem"); + return; + } + + uint32 size = memOpnd->GetSize(); + MapleUnorderedMap::iterator iter; + if ((iter = reuseSpillLocMem.find(size)) != reuseSpillLocMem.end()) { + iter->second->Add(*memOpnd); + } else { + reuseSpillLocMem[size] = memPool->New(*GetFuncScopeAllocator()); + reuseSpillLocMem[size]->Add(*memOpnd); + } +} + +MemOperand *AArch64CGFunc::GetOrCreatSpillMem(regno_t vrNum) { + /* NOTES: must used in RA, not used in other place. */ + if (IsVRegNOForPseudoRegister(vrNum)) { + auto p = pRegSpillMemOperands.find(GetPseudoRegIdxFromVirtualRegNO(vrNum)); + if (p != pRegSpillMemOperands.end()) { + return p->second; + } + } + + auto p = spillRegMemOperands.find(vrNum); + if (p == spillRegMemOperands.end()) { + if (vrNum >= vRegTable.size()) { + CHECK_FATAL(false, "index out of range in AArch64CGFunc::FreeSpillRegMem"); + } + uint32 dataSize = vRegTable[vrNum].GetSize() * kBitsPerByte; + auto it = reuseSpillLocMem.find(dataSize); + if (it != reuseSpillLocMem.end()) { + MemOperand *memOpnd = it->second->GetOne(); + if (memOpnd != nullptr) { + (void)spillRegMemOperands.insert(std::pair(vrNum, memOpnd)); + return memOpnd; + } + } + + RegOperand &baseOpnd = GetOrCreateStackBaseRegOperand(); + int32 offset = GetOrCreatSpillRegLocation(vrNum); + AArch64OfstOperand *offsetOpnd = memPool->New(offset, k64BitSize); + MemOperand *memOpnd = memPool->New(AArch64MemOperand::kAddrModeBOi, dataSize, baseOpnd, + nullptr, offsetOpnd, nullptr); + (void)spillRegMemOperands.insert(std::pair(vrNum, memOpnd)); + return memOpnd; + } else { + return p->second; + } +} + +MemOperand *AArch64CGFunc::GetPseudoRegisterSpillMemoryOperand(PregIdx i) { + MapleUnorderedMap::iterator p; + if (GetCG()->GetOptimizeLevel() == CGOptions::kLevel0) { + p = pRegSpillMemOperands.end(); + } else { + p = pRegSpillMemOperands.find(i); + } + if (p != pRegSpillMemOperands.end()) { + return p->second; + } + int64 offset = GetPseudoRegisterSpillLocation(i); + MIRPreg *preg = GetFunction().GetPregTab()->PregFromPregIdx(i); + uint32 bitLen = GetPrimTypeSize(preg->GetPrimType()) * kBitsPerByte; + RegOperand &base = GetOrCreateFramePointerRegOperand(); + + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd(offset, k32BitSize); + MemOperand &memOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, bitLen, &base, nullptr, &ofstOpnd, nullptr); + if (IsImmediateOffsetOutOfRange(static_cast(memOpnd), bitLen)) { + MemOperand &newMemOpnd = SplitOffsetWithAddInstruction(static_cast(memOpnd), bitLen); + (void)pRegSpillMemOperands.insert(std::pair(i, &newMemOpnd)); + return &newMemOpnd; + } + (void)pRegSpillMemOperands.insert(std::pair(i, &memOpnd)); + return &memOpnd; +} + +/* Get the number of return register of current function. */ +AArch64reg AArch64CGFunc::GetReturnRegisterNumber() { + ReturnMechanism retMech(*(GetFunction().GetReturnType()), GetBecommon()); + if (retMech.GetRegCount() > 0) { + return retMech.GetReg0(); + } + return kRinvalid; +} + +bool AArch64CGFunc::CanLazyBinding(const Insn &ldrInsn) { + Operand &memOpnd = ldrInsn.GetOperand(1); + auto &aarchMemOpnd = static_cast(memOpnd); + if (aarchMemOpnd.GetAddrMode() != AArch64MemOperand::kAddrModeLo12Li) { + return false; + } + + const MIRSymbol *sym = aarchMemOpnd.GetSymbol(); + CHECK_FATAL(sym != nullptr, "sym can't be nullptr"); + if (sym->IsMuidFuncDefTab() || sym->IsMuidFuncUndefTab() || + sym->IsMuidDataDefTab() || sym->IsMuidDataUndefTab() || + (sym->IsReflectionClassInfo() && !sym->IsReflectionArrayClassInfo())) { + return true; + } + + return false; +} + +/* + * add reg, reg, __PTR_C_STR_... + * ldr reg1, [reg] + * => + * ldr reg1, [reg, #:lo12:__Ptr_C_STR_...] + */ +void AArch64CGFunc::ConvertAdrpl12LdrToLdr() { + FOR_ALL_BB(bb, this) { + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + if (nextInsn == nullptr) { + break; + } + if (!insn->IsMachineInstruction()) { + continue; + } + /* check first insn */ + MOperator thisMop = insn->GetMachineOpcode(); + if (thisMop != MOP_xadrpl12) { + continue; + } + /* check second insn */ + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (!(((nextMop >= MOP_wldrsb) && (nextMop <= MOP_dldp)) || ((nextMop >= MOP_wstrb) && (nextMop <= MOP_dstp)))) { + continue; + } + + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + CHECK_FATAL(memOpnd != nullptr, "memOpnd can't be nullptr"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + continue; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + continue; + } + + auto ®Opnd = static_cast(insn->GetOperand(0)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + RegOperand *baseReg = memOpnd->GetBaseRegister(); + CHECK_FATAL(baseReg != nullptr, "baseReg can't be nullptr"); + if (baseReg->GetRegisterNumber() != regOpnd.GetRegisterNumber()) { + continue; + } + + StImmOperand &stImmOpnd = static_cast(insn->GetOperand(kInsnThirdOpnd)); + AArch64OfstOperand &ofstOpnd = GetOrCreateOfstOpnd( + stImmOpnd.GetOffset() + memOpnd->GetOffsetImmediate()->GetOffsetValue(), k32BitSize); + RegOperand &newBaseOpnd = static_cast(insn->GetOperand(kInsnSecondOpnd)); + AArch64MemOperand &newMemOpnd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeLo12Li, memOpnd->GetSize(), + &newBaseOpnd, nullptr, &ofstOpnd, stImmOpnd.GetSymbol()); + nextInsn->SetOperand(1, newMemOpnd); + bb->RemoveInsn(*insn); + } + } +} + +/* + * adrp reg1, __muid_func_undef_tab.. + * ldr reg2, [reg1, #:lo12:__muid_func_undef_tab..] + * => + * intrinsic_adrp_ldr reg2, __muid_func_undef_tab... + */ +void AArch64CGFunc::ConvertAdrpLdrToIntrisic() { + FOR_ALL_BB(bb, this) { + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + if (nextInsn == nullptr) { + break; + } + if (!insn->IsMachineInstruction()) { + continue; + } + + MOperator firstMop = insn->GetMachineOpcode(); + MOperator secondMop = nextInsn->GetMachineOpcode(); + if (!((firstMop == MOP_xadrp) && ((secondMop == MOP_wldr) || (secondMop == MOP_xldr)))) { + continue; + } + + if (CanLazyBinding(*nextInsn)) { + bb->ReplaceInsn(*insn, GetCG()->BuildInstruction(MOP_adrp_ldr, nextInsn->GetOperand(0), + insn->GetOperand(1))); + bb->RemoveInsn(*nextInsn); + } + } + } +} + +void AArch64CGFunc::ProcessLazyBinding() { + ConvertAdrpl12LdrToLdr(); + ConvertAdrpLdrToIntrisic(); +} + +/* + * Generate global long call + * adrp VRx, symbol + * ldr VRx, [VRx, #:lo12:symbol] + * blr VRx + * + * Input: + * insn : insert new instruction after the 'insn' + * func : the symbol of the function need to be called + * srcOpnds : list operand of the function need to be called + * isCleanCall: when generate clean call insn, set isCleanCall as true + * Return: the 'blr' instruction + */ +Insn &AArch64CGFunc::GenerateGlobalLongCallAfterInsn(const MIRSymbol &func, AArch64ListOperand &srcOpnds, + bool isCleanCall) { + MIRSymbol *symbol = GetFunction().GetLocalOrGlobalSymbol(func.GetStIdx()); + symbol->SetStorageClass(kScGlobal); + RegOperand &tmpReg = CreateRegisterOperandOfType(PTY_u64); + StImmOperand &stOpnd = CreateStImmOperand(*symbol, 0, 0); + AArch64OfstOperand &offsetOpnd = CreateOfstOpnd(*symbol, 0); + Insn &adrpInsn = GetCG()->BuildInstruction(MOP_xadrp, tmpReg, stOpnd); + GetCurBB()->AppendInsn(adrpInsn); + AArch64MemOperand &memOrd = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeLo12Li, kSizeOfPtr * kBitsPerByte, + static_cast(&tmpReg), + nullptr, &offsetOpnd, symbol); + Insn &ldrInsn = GetCG()->BuildInstruction(MOP_xldr, tmpReg, memOrd); + GetCurBB()->AppendInsn(ldrInsn); + + if (isCleanCall) { + Insn &callInsn = GetCG()->BuildInstruction(MOP_xblr, tmpReg, srcOpnds); + GetCurBB()->AppendInsn(callInsn); + GetCurBB()->SetHasCall(); + return callInsn; + } else { + Insn &callInsn = GetCG()->BuildInstruction(MOP_xblr, tmpReg, srcOpnds); + GetCurBB()->AppendInsn(callInsn); + GetCurBB()->SetHasCall(); + return callInsn; + } +} + +/* + * Generate local long call + * adrp VRx, symbol + * add VRx, VRx, #:lo12:symbol + * blr VRx + * + * Input: + * insn : insert new instruction after the 'insn' + * func : the symbol of the function need to be called + * srcOpnds : list operand of the function need to be called + * isCleanCall: when generate clean call insn, set isCleanCall as true + * Return: the 'blr' instruction + */ +Insn &AArch64CGFunc::GenerateLocalLongCallAfterInsn(const MIRSymbol &func, AArch64ListOperand &srcOpnds, + bool isCleanCall) { + RegOperand &tmpReg = CreateRegisterOperandOfType(PTY_u64); + StImmOperand &stOpnd = CreateStImmOperand(func, 0, 0); + Insn &adrpInsn = GetCG()->BuildInstruction(MOP_xadrp, tmpReg, stOpnd); + GetCurBB()->AppendInsn(adrpInsn); + Insn &addInsn = GetCG()->BuildInstruction(MOP_xadrpl12, tmpReg, tmpReg, stOpnd); + GetCurBB()->AppendInsn(addInsn); + Insn *callInsn = nullptr; + if (isCleanCall) { + callInsn = &GetCG()->BuildInstruction(MOP_xblr, tmpReg, srcOpnds); + GetCurBB()->AppendInsn(*callInsn); + } else { + callInsn = &GetCG()->BuildInstruction(MOP_xblr, tmpReg, srcOpnds); + GetCurBB()->AppendInsn(*callInsn); + } + GetCurBB()->SetHasCall(); + return *callInsn; +} + +Insn &AArch64CGFunc::AppendCall(const MIRSymbol &sym, AArch64ListOperand &srcOpnds, bool isCleanCall) { + Insn *callInsn = nullptr; + if (CGOptions::IsLongCalls()) { + MIRFunction *mirFunc = sym.GetFunction(); + if (IsDuplicateAsmList(sym) || (mirFunc && mirFunc->GetAttr(FUNCATTR_local))) { + callInsn = &GenerateLocalLongCallAfterInsn(sym, srcOpnds, isCleanCall); + } else { + callInsn = &GenerateGlobalLongCallAfterInsn(sym, srcOpnds, isCleanCall); + } + } else { + Operand &targetOpnd = GetOrCreateFuncNameOpnd(sym); + if (isCleanCall) { + callInsn = &GetCG()->BuildInstruction(MOP_xbl, targetOpnd, srcOpnds); + GetCurBB()->AppendInsn(*callInsn); + } else { + callInsn = &GetCG()->BuildInstruction(MOP_xbl, targetOpnd, srcOpnds); + GetCurBB()->AppendInsn(*callInsn); + } + GetCurBB()->SetHasCall(); + } + return *callInsn; +} + +bool AArch64CGFunc::IsDuplicateAsmList(const MIRSymbol &sym) const { + if (CGOptions::IsDuplicateAsmFileEmpty()) { + return false; + } + + const std::string &name = sym.GetName(); + if ((name == "strlen") || + (name == "strncmp") || + (name == "memcpy") || + (name == "memmove") || + (name == "strcmp") || + (name == "memcmp") || + (name == "memcmpMpl")) { + return true; + } + return false; +} + +void AArch64CGFunc::SelectMPLProfCounterInc(IntrinsiccallNode &intrnNode) { + ASSERT(intrnNode.NumOpnds() == 1, "must be 1 operand"); + BaseNode *arg1 = intrnNode.Opnd(0); + ASSERT(arg1 != nullptr, "nullptr check"); + regno_t vRegNO1 = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &vReg1 = CreateVirtualRegisterOperand(vRegNO1); + vReg1.SetRegNotBBLocal(); + static MIRSymbol *bbProfileTab = nullptr; + if (!bbProfileTab) { + std::string bbProfileName = namemangler::kBBProfileTabPrefixStr + GetMirModule().GetFileNameAsPostfix(); + bbProfileTab = GetMirModule().GetMIRBuilder()->GetGlobalDecl(bbProfileName); + CHECK_FATAL(bbProfileTab != nullptr, "expect bb profile tab"); + } + ConstvalNode *constvalNode = static_cast(arg1); + MIRConst *mirConst = constvalNode->GetConstVal(); + ASSERT(mirConst != nullptr, "nullptr check"); + CHECK_FATAL(mirConst->GetKind() == kConstInt, "expect MIRIntConst type"); + MIRIntConst *mirIntConst = safe_cast(mirConst); + uint32 idx = GetPrimTypeSize(PTY_u32) * mirIntConst->GetValue(); + if (!GetCG()->IsQuiet()) { + maple::LogInfo::MapleLogger(kLlErr) << "Id index " << idx << std::endl; + } + StImmOperand &stOpnd = CreateStImmOperand(*bbProfileTab, idx, 0); + Insn &newInsn = GetCG()->BuildInstruction(MOP_counter, vReg1, stOpnd); + newInsn.SetDoNotRemove(true); + GetCurBB()->AppendInsn(newInsn); +} + +void AArch64CGFunc::SelectMPLClinitCheck(IntrinsiccallNode &intrnNode) { + ASSERT(intrnNode.NumOpnds() == 1, "must be 1 operand"); + BaseNode *arg = intrnNode.Opnd(0); + Operand *stOpnd = nullptr; + bool bClinitSeperate = false; + ASSERT(CGOptions::IsPIC(), "must be doPIC"); + if (arg->GetOpCode() == OP_addrof) { + AddrofNode *addrof = static_cast(arg); + MIRSymbol *symbol = GetFunction().GetLocalOrGlobalSymbol(addrof->GetStIdx()); + ASSERT(symbol->GetName().find(CLASSINFO_PREFIX_STR) == 0, "must be a symbol with __classinfo__"); + + if (!symbol->IsMuidDataUndefTab()) { + std::string ptrName = namemangler::kPtrPrefixStr + symbol->GetName(); + MIRType *ptrType = GlobalTables::GetTypeTable().GetPtr(); + symbol = GetMirModule().GetMIRBuilder()->GetOrCreateGlobalDecl(ptrName, *ptrType); + bClinitSeperate = true; + symbol->SetStorageClass(kScFstatic); + } + stOpnd = &CreateStImmOperand(*symbol, 0, 0); + } else { + arg = arg->Opnd(0); + BaseNode *arg0 = arg->Opnd(0); + BaseNode *arg1 = arg->Opnd(1); + ASSERT(arg0 != nullptr, "nullptr check"); + ASSERT(arg1 != nullptr, "nullptr check"); + ASSERT(arg0->GetOpCode() == OP_addrof, "expect the operand to be addrof"); + AddrofNode *addrof = static_cast(arg0); + MIRSymbol *symbol = GetFunction().GetLocalOrGlobalSymbol(addrof->GetStIdx()); + ASSERT(addrof->GetFieldID() == 0, "For debug SelectMPLClinitCheck."); + ConstvalNode *constvalNode = static_cast(arg1); + MIRConst *mirConst = constvalNode->GetConstVal(); + ASSERT(mirConst != nullptr, "nullptr check"); + CHECK_FATAL(mirConst->GetKind() == kConstInt, "expect MIRIntConst type"); + MIRIntConst *mirIntConst = safe_cast(mirConst); + stOpnd = &CreateStImmOperand(*symbol, mirIntConst->GetValue(), 0); + } + + regno_t vRegNO2 = NewVReg(GetRegTyFromPrimTy(PTY_a64), GetPrimTypeSize(PTY_a64)); + RegOperand &vReg2 = CreateVirtualRegisterOperand(vRegNO2); + vReg2.SetRegNotBBLocal(); + if (bClinitSeperate) { + /* Seperate MOP_clinit to MOP_adrp_ldr + MOP_clinit_tail. */ + Insn &newInsn = GetCG()->BuildInstruction(MOP_adrp_ldr, vReg2, *stOpnd); + GetCurBB()->AppendInsn(newInsn); + newInsn.SetDoNotRemove(true); + Insn &insn = GetCG()->BuildInstruction(MOP_clinit_tail, vReg2); + insn.SetDoNotRemove(true); + GetCurBB()->AppendInsn(insn); + } else { + Insn &newInsn = GetCG()->BuildInstruction(MOP_clinit, vReg2, *stOpnd); + GetCurBB()->AppendInsn(newInsn); + } +} +void AArch64CGFunc::GenCVaStartIntrin(RegOperand &opnd, uint32 stkSize) { + /* FPLR only pushed in regalloc() after intrin function */ + Operand &stkOpnd = GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + + /* __stack */ + AArch64ImmOperand *offsOpnd = &CreateImmOperand(0, k64BitSize, true, kUnAdjustVary); /* isvary reset StackFrameSize */ + AArch64ImmOperand *offsOpnd2 = &CreateImmOperand(stkSize, k64BitSize, false); + RegOperand &vReg = CreateVirtualRegisterOperand(NewVReg(kRegTyInt, GetPrimTypeSize(PTY_a64))); + if (stkSize) { + SelectAdd(vReg, *offsOpnd, *offsOpnd2, PTY_a64); + SelectAdd(vReg, stkOpnd, vReg, PTY_a64); + } else { + SelectAdd(vReg, stkOpnd, *offsOpnd, PTY_a64); + } + AArch64OfstOperand *offOpnd = &GetOrCreateOfstOpnd(0, k64BitSize); + /* mem operand in va_list struct (lhs) */ + MemOperand *strOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, &opnd, nullptr, + offOpnd, static_cast(nullptr)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xstr, vReg, *strOpnd)); + + /* __gr_top ; it's the same as __stack before the 1st va_arg */ + offOpnd = &GetOrCreateOfstOpnd(k8BitSize, k64BitSize); + strOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, &opnd, nullptr, + offOpnd, static_cast(nullptr)); + SelectAdd(vReg, stkOpnd, *offsOpnd, PTY_a64); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xstr, vReg, *strOpnd)); + + /* __vr_top */ + int32 grAreaSize = static_cast(GetMemlayout())->GetSizeOfGRSaveArea(); + offsOpnd2 = &CreateImmOperand(RoundUp(grAreaSize, kSizeOfPtr * 2), k64BitSize, false); + SelectSub(vReg, *offsOpnd, *offsOpnd2, PTY_a64); /* if 1st opnd is register => sub */ + SelectAdd(vReg, stkOpnd, vReg, PTY_a64); + offOpnd = &GetOrCreateOfstOpnd(k16BitSize, k64BitSize); + strOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, &opnd, nullptr, + offOpnd, static_cast(nullptr)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xstr, vReg, *strOpnd)); + + /* __gr_offs */ + int32 offs = 0 - grAreaSize; + offsOpnd = &CreateImmOperand(offs, k32BitSize, false); + RegOperand *tmpReg = &CreateRegisterOperandOfType(PTY_i32); /* offs value to be assigned (rhs) */ + SelectCopyImm(*tmpReg, *offsOpnd, PTY_i32); + offOpnd = &GetOrCreateOfstOpnd(kSizeOfPtr * 3, k32BitSize); + strOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k32BitSize, &opnd, nullptr, + offOpnd, static_cast(nullptr)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wstr, *tmpReg, *strOpnd)); + + /* __vr_offs */ + offs = 0 - static_cast(GetMemlayout())->GetSizeOfVRSaveArea(); + offsOpnd = &CreateImmOperand(offs, k32BitSize, false); + tmpReg = &CreateRegisterOperandOfType(PTY_i32); + SelectCopyImm(*tmpReg, *offsOpnd, PTY_i32); + offOpnd = &GetOrCreateOfstOpnd((kSizeOfPtr * 3 + sizeof(int32)), k32BitSize); + strOpnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k32BitSize, &opnd, nullptr, + offOpnd, static_cast(nullptr)); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wstr, *tmpReg, *strOpnd)); +} + +void AArch64CGFunc::SelectCVaStart(const IntrinsiccallNode &intrnNode) { + ASSERT(intrnNode.NumOpnds() == 2, "must be 2 operands"); + /* 2 operands, but only 1 needed. Don't need to emit code for second operand + * + * va_list is a passed struct with an address, load its address + */ + BaseNode *argExpr = intrnNode.Opnd(0); + Operand *opnd = HandleExpr(intrnNode, *argExpr); + RegOperand &opnd0 = LoadIntoRegister(*opnd, PTY_a64); /* first argument of intrinsic */ + + /* Find beginning of unnamed arg on stack. + * Ex. void foo(int i1, int i2, ... int i8, struct S r, struct S s, ...) + * where struct S has size 32, address of r and s are on stack but they are named. + */ + ParmLocator parmLocator(GetBecommon()); + PLocInfo pLoc; + uint32 stkSize = 0; + for (uint32 i = 0; i < GetFunction().GetFormalCount(); i++) { + MIRType *ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(GetFunction().GetNthParamTyIdx(i)); + parmLocator.LocateNextParm(*ty, pLoc); + if (pLoc.reg0 == kRinvalid) { /* on stack */ + stkSize = pLoc.memOffset + pLoc.memSize; + } + } + stkSize = RoundUp(stkSize, kSizeOfPtr); + + GenCVaStartIntrin(opnd0, stkSize); + + return; +} + +void AArch64CGFunc::SelectIntrinCall(IntrinsiccallNode &intrinsiccallNode) { + MIRIntrinsicID intrinsic = intrinsiccallNode.GetIntrinsic(); + + if (GetCG()->GenerateVerboseCG()) { + std::string comment = GetIntrinsicName(intrinsic); + GetCurBB()->AppendInsn(CreateCommentInsn(comment)); + } + + /* + * At this moment, we eagerly evaluates all argument expressions. In theory, + * there could be intrinsics that extract meta-information of variables, such as + * their locations, rather than computing their values. Applications + * include building stack maps that help runtime libraries to find the values + * of local variables (See @stackmap in LLVM), in which case knowing their + * locations will suffice. + */ + if (intrinsic == INTRN_MPL_CLINIT_CHECK) { /* special case */ + SelectMPLClinitCheck(intrinsiccallNode); + return; + } + if (intrinsic == INTRN_MPL_PROF_COUNTER_INC) { /* special case */ + SelectMPLProfCounterInc(intrinsiccallNode); + return; + } + if ((intrinsic == INTRN_MPL_CLEANUP_LOCALREFVARS) || (intrinsic == INTRN_MPL_CLEANUP_LOCALREFVARS_SKIP) || + (intrinsic == INTRN_MPL_CLEANUP_NORETESCOBJS)) { + return; + } + if (intrinsic == INTRN_C_va_start) { + SelectCVaStart(intrinsiccallNode); + return; + } + std::vector operands; /* Temporary. Deallocated on return. */ + AArch64ListOperand *srcOpnds = memPool->New(*GetFuncScopeAllocator()); + for (size_t i = 0; i < intrinsiccallNode.NumOpnds(); i++) { + BaseNode *argExpr = intrinsiccallNode.Opnd(i); + Operand *opnd = HandleExpr(intrinsiccallNode, *argExpr); + operands.emplace_back(opnd); + if (!opnd->IsRegister()) { + opnd = &LoadIntoRegister(*opnd, argExpr->GetPrimType()); + } + RegOperand *expRegOpnd = static_cast(opnd); + srcOpnds->PushOpnd(*expRegOpnd); + } + CallReturnVector *retVals = &intrinsiccallNode.GetReturnVec(); + + switch (intrinsic) { + case INTRN_MPL_ATOMIC_EXCHANGE_PTR: { + BB *origFtBB = GetCurBB()->GetNext(); + Operand *loc = operands[kInsnFirstOpnd]; + Operand *newVal = operands[kInsnSecondOpnd]; + Operand *memOrd = operands[kInsnThirdOpnd]; + + MemOrd ord = OperandToMemOrd(*memOrd); + bool isAcquire = MemOrdIsAcquire(ord); + bool isRelease = MemOrdIsRelease(ord); + + const PrimType kValPrimType = PTY_a64; + + RegOperand &locReg = LoadIntoRegister(*loc, PTY_a64); + /* Because there is no live analysis when -O1 */ + if (Globals::GetInstance()->GetOptimLevel() == 0) { + locReg.SetRegNotBBLocal(); + } + AArch64MemOperand &locMem = GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, + k64BitSize, &locReg, nullptr, &GetOrCreateOfstOpnd(0, k32BitSize), nullptr); + RegOperand &newValReg = LoadIntoRegister(*newVal, PTY_a64); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + newValReg.SetRegNotBBLocal(); + } + GetCurBB()->SetKind(BB::kBBFallthru); + + LabelIdx retryLabIdx = CreateLabeledBB(intrinsiccallNode); + + RegOperand *oldVal = SelectLoadExcl(kValPrimType, locMem, isAcquire); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + oldVal->SetRegNotBBLocal(); + } + RegOperand *succ = SelectStoreExcl(kValPrimType, locMem, newValReg, isRelease); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + succ->SetRegNotBBLocal(); + } + + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_wcbnz, *succ, + GetOrCreateLabelOperand(retryLabIdx))); + GetCurBB()->SetKind(BB::kBBIntrinsic); + GetCurBB()->SetNext(origFtBB); + + SaveReturnValueInLocal(*retVals, 0, kValPrimType, *oldVal, intrinsiccallNode); + break; + } + case INTRN_GET_AND_ADDI: { + IntrinsifyGetAndAddInt(*srcOpnds, PTY_i32); + break; + } + case INTRN_GET_AND_ADDL: { + IntrinsifyGetAndAddInt(*srcOpnds, PTY_i64); + break; + } + case INTRN_GET_AND_SETI: { + IntrinsifyGetAndSetInt(*srcOpnds, PTY_i32); + break; + } + case INTRN_GET_AND_SETL: { + IntrinsifyGetAndSetInt(*srcOpnds, PTY_i64); + break; + } + case INTRN_COMP_AND_SWAPI: { + IntrinsifyCompareAndSwapInt(*srcOpnds, PTY_i32); + break; + } + case INTRN_COMP_AND_SWAPL: { + IntrinsifyCompareAndSwapInt(*srcOpnds, PTY_i64); + break; + } + default: { + CHECK_FATAL(false, "Intrinsic %d: %s not implemented by the AArch64 CG.", intrinsic, GetIntrinsicName(intrinsic)); + break; + } + } +} + +/* + * NOTE: consider moving the following things into aarch64_cg.cpp They may + * serve not only inrinsics, but other MapleIR instructions as well. + * Do it as if we are adding a label in straight-line assembly code. + */ +LabelIdx AArch64CGFunc::CreateLabeledBB(StmtNode &stmt) { + LabelIdx labIdx = CreateLabel(); + BB *newBB = StartNewBBImpl(false, stmt); + newBB->AddLabel(labIdx); + SetLab2BBMap(labIdx, *newBB); + SetCurBB(*newBB); + return labIdx; +} + +/* Save value into the local variable for the index-th return value; */ +void AArch64CGFunc::SaveReturnValueInLocal(CallReturnVector &retVals, size_t index, PrimType primType, Operand &value, + StmtNode &parentStmt) { + CallReturnPair &pair = retVals.at(index); + BB tempBB(static_cast(-1), *GetFuncScopeAllocator()); + BB *realCurBB = GetCurBB(); + SetCurBB(tempBB); + CHECK_FATAL(!pair.second.IsReg(), "NYI"); + SelectDassign(pair.first, pair.second.GetFieldID(), primType, value); + CHECK_FATAL(realCurBB->GetNext() == nullptr, "current BB must has not nextBB"); + realCurBB->SetLastStmt(parentStmt); + realCurBB->SetNext(StartNewBBImpl(true, parentStmt)); + realCurBB->GetNext()->SetKind(BB::kBBFallthru); + realCurBB->GetNext()->SetPrev(realCurBB); + + realCurBB->GetNext()->InsertAtBeginning(*GetCurBB()); + /* restore it */ + SetCurBB(*realCurBB->GetNext()); +} + +/* The following are translation of LL/SC and atomic RMW operations */ +MemOrd AArch64CGFunc::OperandToMemOrd(Operand &opnd) { + CHECK_FATAL(opnd.IsImmediate(), "Memory order must be an int constant."); + auto immOpnd = static_cast(&opnd); + int32 val = immOpnd->GetValue(); + CHECK_FATAL(val >= 0, "val must be non-negtive"); + return MemOrdFromU32(static_cast(val)); +} + +/* + * Generate ldxr or ldaxr instruction. + * byte_p2x: power-of-2 size of operand in bytes (0: 1B, 1: 2B, 2: 4B, 3: 8B). + */ +MOperator AArch64CGFunc::PickLoadStoreExclInsn(uint32 byteP2Size, bool store, bool acqRel) const { + CHECK_FATAL(byteP2Size < kIntByteSizeDimension, "Illegal argument p2size: %d", byteP2Size); + + static MOperator operators[4][2][2] = { { { MOP_wldxrb, MOP_wldaxrb }, { MOP_wstxrb, MOP_wstlxrb } }, + { { MOP_wldxrh, MOP_wldaxrh }, { MOP_wstxrh, MOP_wstlxrh } }, + { { MOP_wldxr, MOP_wldaxr }, { MOP_wstxr, MOP_wstlxr } }, + { { MOP_xldxr, MOP_xldaxr }, { MOP_xstxr, MOP_xstlxr } } }; + + MOperator optr = operators[byteP2Size][store][acqRel]; + CHECK_FATAL(optr != MOP_undef, "Unsupported type p2size: %d", byteP2Size); + + return optr; +} + +RegOperand *AArch64CGFunc::SelectLoadExcl(PrimType valPrimType, AArch64MemOperand &loc, bool acquire) { + uint32 p2size = GetPrimTypeP2Size(valPrimType); + + RegOperand &result = CreateRegisterOperandOfType(valPrimType); + MOperator mOp = PickLoadStoreExclInsn(p2size, false, acquire); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, result, loc)); + + return &result; +} + +RegOperand *AArch64CGFunc::SelectStoreExcl(PrimType valPty, AArch64MemOperand &loc, RegOperand &newVal, bool release) { + uint32 p2size = GetPrimTypeP2Size(valPty); + + /* the result (success/fail) is to be stored in a 32-bit register */ + RegOperand &result = CreateRegisterOperandOfType(PTY_u32); + + MOperator mOp = PickLoadStoreExclInsn(p2size, true, release); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mOp, result, newVal, loc)); + + return &result; +} + +RegType AArch64CGFunc::GetRegisterType(regno_t reg) const { + if (AArch64isa::IsPhysicalRegister(reg)) { + return AArch64isa::GetRegType(static_cast(reg)); + } else if (reg == kRFLAG) { + return kRegTyCc; + } else { + return CGFunc::GetRegisterType(reg); + } +} + +MemOperand &AArch64CGFunc::LoadStructCopyBase(const MIRSymbol &symbol, int32 offset, int dataSize) { + /* For struct formals > 16 bytes, this is the pointer to the struct copy. */ + /* Load the base pointer first. */ + RegOperand *vreg = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, k8ByteSize)); + MemOperand *baseMemOpnd = &GetOrCreateMemOpnd(symbol, 0, k64BitSize); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickLdInsn(k64BitSize, PTY_i64), *vreg, *baseMemOpnd)); + /* Create the indirect load mem opnd from the base pointer. */ + return CreateMemOpnd(*vreg, offset, dataSize); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_color_ra.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_color_ra.cpp new file mode 100644 index 0000000000000000000000000000000000000000..14579c426973d88a305bf4239ba1d874107ab38a --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_color_ra.cpp @@ -0,0 +1,3108 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_color_ra.h" +#include +#include +#include "riscv64_cg.h" +#include "mir_lower.h" +#include "securec.h" + +/* + * Based on concepts from Chow and Hennessey. + * Phases are as follows: + * Prepass to collect local BB information. + * Compute local register allocation demands for global RA. + * Compute live ranges. + * Live ranges LR represented by a vector of size #BBs. + * for each cross bb vreg, a bit is set in the vector. + * Build interference graph with basic block as granularity. + * When intersection of two LRs is not null, they interfere. + * Separate unconstrained and constrained LRs. + * unconstrained - LR with connect edges less than available colors. + * These LR can always be colored. + * constrained - not uncontrained. + * Split LR based on priority cost + * Repetitive adding BB from original LR to new LR until constrained. + * Update all LR the new LR interferes with. + * Color the new LR + * Each LR has a forbidden list, the registers cannot be assigned + * Coalesce move using preferred color first. + * Mark the remaining uncolorable LR after split as spill. + * Local register allocate. + * Emit and insert spills. + */ +namespace maplebe { +#define JAVALANG (cgFunc->GetMirModule().IsJavaModule()) + +/* + * for physical regOpnd phyOpnd, + * R0->GetRegisterNumber() == 1 + * V0->GetRegisterNumber() == 33 + */ +constexpr uint32 kLoopWeight = 10; +constexpr uint32 kAdjustWeight = 2; + +#define GCRA_DUMP CG_DEBUG_FUNC(cgFunc) + +void LiveUnit::PrintLiveUnit() const { + LogInfo::MapleLogger() << "[" << begin << "," << end << "]" + << ""; + if (!hasCall) { + /* Too many calls, so only print when there is no call. */ + LogInfo::MapleLogger() << " nc"; + } + if (needReload) { + LogInfo::MapleLogger() << " rlod"; + } + if (needRestore) { + LogInfo::MapleLogger() << " rstr"; + } +} + +template +void GraphColorRegAllocator::ForEachBBArrElem(const uint64 *vec, Func functor) const { + for (uint32 iBBArrElem = 0; iBBArrElem < bbBuckets; ++iBBArrElem) { + for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) { + if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) { + functor(iBBArrElem * kU64 + bBBArrElem); + } + } + } +} + +template +void GraphColorRegAllocator::ForEachBBArrElemWithInterrupt(const uint64 *vec, Func functor) const { + for (uint32 iBBArrElem = 0; iBBArrElem < bbBuckets; ++iBBArrElem) { + for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) { + if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) { + if (functor(iBBArrElem * kU64 + bBBArrElem)) { + return; + } + } + } + } +} + +template +void GraphColorRegAllocator::ForEachRegArrElem(const uint64 *vec, Func functor) const { + for (uint32 iBBArrElem = 0; iBBArrElem < regBuckets; ++iBBArrElem) { + for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) { + if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) { + functor(iBBArrElem * kU64 + bBBArrElem); + } + } + } +} + +void GraphColorRegAllocator::PrintLiveUnitMap(const LiveRange &lr) const { + LogInfo::MapleLogger() << "\n\tlu:"; + for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) { + if (!IsBitArrElemSet(lr.GetBBMember(), i)) { + continue; + } + auto lu = lr.GetLuMap().find(i); + if (lu != lr.GetLuMap().end()) { + LogInfo::MapleLogger() << "(" << i << " "; + lu->second->PrintLiveUnit(); + LogInfo::MapleLogger() << ")"; + } + } + LogInfo::MapleLogger() << "\n"; +} + +void GraphColorRegAllocator::PrintLiveRangeConflicts(const LiveRange &lr) const { + LogInfo::MapleLogger() << "\n\tinterfere(" << lr.GetNumBBConflicts() << "): "; + for (uint32 i = 0; i < regBuckets; ++i) { + uint64 chunk = lr.GetBBConflictElem(i); + for (uint64 bit = 0; bit < kU64; ++bit) { + if (chunk & (1ULL << bit)) { + regno_t newNO = i * kU64 + bit; + LogInfo::MapleLogger() << newNO << ","; + } + } + } + LogInfo::MapleLogger() << "\n"; +} + +void GraphColorRegAllocator::PrintLiveBBBit(const LiveRange &lr) const { + LogInfo::MapleLogger() << "live_bb(" << lr.GetNumBBMembers() << "): "; + for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) { + if (IsBitArrElemSet(lr.GetBBMember(), i)) { + LogInfo::MapleLogger() << i << " "; + } + } + LogInfo::MapleLogger() << "\n"; +} + +void GraphColorRegAllocator::PrintLiveRange(const LiveRange &lr, const std::string &str) const { + LogInfo::MapleLogger() << str << "\n"; + + LogInfo::MapleLogger() << "R" << lr.GetRegNO(); + if (lr.GetRegType() == kRegTyInt) { + LogInfo::MapleLogger() << "(I)"; + } else if (lr.GetRegType() == kRegTyFloat) { + LogInfo::MapleLogger() << "(F)"; + } else { + LogInfo::MapleLogger() << "(U)"; + } + LogInfo::MapleLogger() << "\tnumCall " << lr.GetNumCall(); + LogInfo::MapleLogger() << "\tpriority " << lr.GetPriority(); + LogInfo::MapleLogger() << "\tforbidden: "; + for (regno_t preg = kInvalidRegNO; preg < kMaxRegNum; preg++) { + if (lr.GetForbidden(preg)) { + LogInfo::MapleLogger() << preg << ","; + } + } + LogInfo::MapleLogger() << "\tpregveto: "; + for (regno_t preg = kInvalidRegNO; preg < kMaxRegNum; preg++) { + if (lr.GetPregveto(preg)) { + LogInfo::MapleLogger() << preg << ","; + } + } + if (lr.IsSpilled()) { + LogInfo::MapleLogger() << " spilled"; + } + if (lr.GetSplitLr()) { + LogInfo::MapleLogger() << " split"; + } + LogInfo::MapleLogger() << "\n"; + PrintLiveBBBit(lr); + PrintLiveRangeConflicts(lr); + PrintLiveUnitMap(lr); + if (lr.GetSplitLr()) { + PrintLiveRange(*lr.GetSplitLr(), "===>Split LR"); + } +} + +void GraphColorRegAllocator::PrintLiveRanges() const { + for (auto *lr : lrVec) { + if (lr == nullptr || lr->GetRegNO() == 0) { + continue; + } + PrintLiveRange(*lr, ""); + } + LogInfo::MapleLogger() << "\n"; +} + +void GraphColorRegAllocator::PrintLocalRAInfo(const std::string &str) const { + LogInfo::MapleLogger() << str << "\n"; + for (uint32 id = 0; id < cgFunc->NumBBs(); ++id) { + LocalRaInfo *lraInfo = localRegVec[id]; + if (lraInfo == nullptr) { + continue; + } + LogInfo::MapleLogger() << "bb " << id << " def "; + for (const auto &defCntPair : lraInfo->GetDefCnt()) { + LogInfo::MapleLogger() << "[" << defCntPair.first << ":" << defCntPair.second << "],"; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "use "; + for (const auto &useCntPair : lraInfo->GetUseCnt()) { + LogInfo::MapleLogger() << "[" << useCntPair.first << ":" << useCntPair.second << "],"; + } + LogInfo::MapleLogger() << "\n"; + } +} + +void GraphColorRegAllocator::PrintBBAssignInfo() const { + for (size_t id = 0; id < sortedBBs.size(); ++id) { + uint32 bbID = sortedBBs[id]->GetId(); + BBAssignInfo *bbInfo = bbRegInfo[bbID]; + if (bbInfo == nullptr) { + continue; + } + LogInfo::MapleLogger() << "BBinfo(" << id << ")"; + LogInfo::MapleLogger() << " lra-needed int " << bbInfo->GetIntLocalRegsNeeded(); + LogInfo::MapleLogger() << " fp " << bbInfo->GetFpLocalRegsNeeded(); + LogInfo::MapleLogger() << " greg-used "; + for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) { + if (bbInfo->GetGlobalsAssigned(regNO)) { + LogInfo::MapleLogger() << regNO << ","; + } + } + LogInfo::MapleLogger() << "\n"; + } +} + +void GraphColorRegAllocator::CalculatePriority(LiveRange &lr) const { +#ifdef RANDOM_PRIORITY + unsigned long seed = 0; + size_t size = sizeof(seed); + std::ifstream randomNum("/dev/random", std::ios::in | std::ios::binary); + if (randomNum) { + randomNum.read(reinterpret_cast(&seed), size); + if (randomNum) { + lr.SetPriority(1 / (seed + 1)); + } + randomNum.close(); + } else { + std::cerr << "Failed to open /dev/urandom" << '\n'; + } + return; +#endif /* RANDOM_PRIORITY */ + float pri = 0.0; + uint32 bbNum = 0; + auto calculatePriorityFunc = [&lr, &bbNum, &pri, this] (uint32 bbID) { + auto lu = lr.FindInLuMap(bbID); + ASSERT(lu != lr.EndOfLuMap(), "can not find live unit"); + BB *bb = bbVec[bbID]; + ++bbNum; + uint32 useCnt = lu->second->GetDefNum() + lu->second->GetUseNum(); + uint32 mult; +#ifdef USE_BB_FREQUENCY + mult = bb->GetFrequency(); +#else /* USE_BB_FREQUENCY */ + if (bb->GetLoop() != nullptr) { + mult = static_cast(pow(kLoopWeight, bb->GetLoop()->GetLoopLevel() * kAdjustWeight)); + } else { + mult = 1; + } +#endif /* USE_BB_FREQUENCY */ + pri += useCnt * mult; + }; + ForEachBBArrElem(lr.GetBBMember(), calculatePriorityFunc); + + if (bbNum != 0) { + lr.SetPriority(::log(pri) / bbNum); + } else { + lr.SetPriority(0.0); + } +} + +void GraphColorRegAllocator::PrintBBs() const { + for (auto *bb : sortedBBs) { + LogInfo::MapleLogger() << "\n< === > "; + LogInfo::MapleLogger() << bb->GetId(); + LogInfo::MapleLogger() << " succs:"; + for (auto *succBB : bb->GetSuccs()) { + LogInfo::MapleLogger() << " " << succBB->GetId(); + } + LogInfo::MapleLogger() << " eh_succs:"; + for (auto *succBB : bb->GetEhSuccs()) { + LogInfo::MapleLogger() << " " << succBB->GetId(); + } + } + LogInfo::MapleLogger() << "\n"; +} + +uint32 GraphColorRegAllocator::MaxIntPhysRegNum() const { + return (R28 - R0); +} + +uint32 GraphColorRegAllocator::MaxFloatPhysRegNum() const { + return (V31 - V0); +} + +bool GraphColorRegAllocator::IsReservedReg(AArch64reg regNO) const { + return (regNO == R16) || (regNO == R17); +} + +void GraphColorRegAllocator::InitFreeRegPool() { + /* + * ==== int regs ==== + * FP 29, LR 30, SP 31, 0 to 7 parameters + + * MapleCG defines 32 as ZR (zero register) + * use 8 if callee does not return large struct ? No + * 16 and 17 are intra-procedure call temp, can be caller saved + * 18 is platform reg, still use it + */ + uint32 intNum = 0; + uint32 fpNum = 0; + for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) { + if (!AArch64Abi::IsAvailableReg(static_cast(regNO))) { + continue; + } + + /* + * Because of the try-catch scenario in JAVALANG, + * we should use specialized spill register to prevent register changes when exceptions occur. + */ + if (JAVALANG && AArch64Abi::IsSpillRegInRA(static_cast(regNO), needExtraSpillReg)) { + if (AArch64isa::IsGPRegister(static_cast(regNO))) { + /* Preset int spill registers */ + (void)intSpillRegSet.insert(regNO - R0); + } else { + /* Preset float spill registers */ + (void)fpSpillRegSet.insert(regNO - V0); + } + continue; + } + +#ifdef RESERVED_REGS + /* r16,r17 are used besides ra. */ + if (IsReservedReg(static_cast(regNO))) { + continue; + } +#endif /* RESERVED_REGS */ + + if (AArch64isa::IsGPRegister(static_cast(regNO))) { + /* when yieldpoint is enabled, x19 is reserved. */ + if (IsYieldPointReg(static_cast(regNO))) { + continue; + } + if (AArch64Abi::IsCalleeSavedReg(static_cast(regNO))) { + (void)intCalleeRegSet.insert(regNO - R0); + } else { + (void)intCallerRegSet.insert(regNO - R0); + } + ++intNum; + } else { + if (AArch64Abi::IsCalleeSavedReg(static_cast(regNO))) { + (void)fpCalleeRegSet.insert(regNO - V0); + } else { + (void)fpCallerRegSet.insert(regNO - V0); + } + ++fpNum; + } + } + intRegNum = intNum; + fpRegNum = fpNum; +} + +void GraphColorRegAllocator::InitCCReg() { + Operand &opnd = cgFunc->GetOrCreateRflag(); + auto &tmpRegOp = static_cast(opnd); + ccReg = tmpRegOp.GetRegisterNumber(); +} + +bool GraphColorRegAllocator::IsUnconcernedReg(regno_t regNO) const { + /* RFP = 30, RLR = 31, RSP = 32, RZR = 33 */ + if ((regNO >= RFP && regNO <= RZR) || regNO == ccReg) { + return true; + } + + /* when yieldpoint is enabled, the RYP(x19) can not be used. */ + if (IsYieldPointReg(static_cast(regNO))) { + return true; + } + + return false; +} + +bool GraphColorRegAllocator::IsUnconcernedReg(const RegOperand ®Opnd) const { + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyCc || regType == kRegTyVary) { + return true; + } + if (regOpnd.IsConstReg()) { + return true; + } + uint32 regNO = regOpnd.GetRegisterNumber(); + return IsUnconcernedReg(regNO); +} + +/* + * Based on live analysis, the live-in and live-out set determines + * the bit to be set in the LR vector, which is of size #BBs. + * If a vreg is in the live-in and live-out set, it is live in the BB. + * + * Also keep track if a LR crosses a call. If a LR crosses a call, it + * interferes with all caller saved registers. Add all caller registers + * to the LR's forbidden list. + * + * Return created LiveRange object + * + * maybe need extra info: + * Add info for setjmp. + * Add info for defBB, useBB, index in BB for def and use + * Add info for startingBB and endingBB + */ +LiveRange *GraphColorRegAllocator::NewLiveRange() { + LiveRange *lr = cgFunc->GetMemoryPool()->New(alloc); + + if (bbBuckets == 0) { + bbBuckets = (cgFunc->NumBBs() / kU64) + 1; + } + lr->SetBBBuckets(bbBuckets); + lr->InitBBMember(*cgFunc->GetMemoryPool(), bbBuckets); + if (regBuckets == 0) { + regBuckets = (cgFunc->GetMaxRegNum() / kU64) + 1; + } + lr->SetRegBuckets(regBuckets); + lr->InitBBConflict(*cgFunc->GetMemoryPool(), regBuckets); + lr->InitPregveto(); + lr->InitForbidden(); + return lr; +} + +/* Create local info for LR. return true if reg is not local. */ +bool GraphColorRegAllocator::CreateLiveRangeHandleLocal(regno_t regNO, const BB &bb, bool isDef) { + if (FindIn(bb.GetLiveInRegNO(), regNO) || FindIn(bb.GetLiveOutRegNO(), regNO)) { + return true; + } + /* + * register not in globals for the bb, so it is local. + * Compute local RA info. + */ + LocalRaInfo *lraInfo = localRegVec[bb.GetId()]; + if (lraInfo == nullptr) { + lraInfo = cgFunc->GetMemoryPool()->New(alloc); + localRegVec[bb.GetId()] = lraInfo; + } + if (isDef) { + /* movk is handled by different id for use/def in the same insn. */ + lraInfo->SetDefCntElem(regNO, lraInfo->GetDefCntElem(regNO) + 1); + } else { + lraInfo->SetUseCntElem(regNO, lraInfo->GetUseCntElem(regNO) + 1); + } + /* lr info is useful for lra, so continue lr info */ + return false; +} + +LiveRange *GraphColorRegAllocator::CreateLiveRangeAllocateAndUpdate(regno_t regNO, const BB &bb, bool isDef, + uint32 currId) { + LiveRange *lr = nullptr; + if (lrVec[regNO] == nullptr) { + lr = NewLiveRange(); + lr->SetID(currId); + + LiveUnit *lu = cgFunc->GetMemoryPool()->New(); + lr->SetElemToLuMap(bb.GetId(), *lu); + lu->SetBegin(currId); + lu->SetEnd(currId); + if (isDef) { + /* means no use after def for reg, chances for ebo opt */ + for (const auto &pregNO : pregLive) { + lr->InsertElemToPregveto(pregNO); + } + } + } else { + lr = lrVec[regNO]; + + LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb.GetId()); + if (lu == nullptr) { + lu = cgFunc->GetMemoryPool()->New(); + lr->SetElemToLuMap(bb.GetId(), *lu); + lu->SetBegin(currId); + lu->SetEnd(currId); + } + if (lu->GetBegin() > currId) { + lu->SetBegin(currId); + } + } + + return lr; +} + +bool GraphColorRegAllocator::CreateLiveRange(regno_t regNO, BB &bb, bool isDef, uint32 currId, bool updateCount) { + bool isNonLocal = CreateLiveRangeHandleLocal(regNO, bb, isDef); + + if (!isDef) { + --currId; + } + + LiveRange *lr = CreateLiveRangeAllocateAndUpdate(regNO, bb, isDef, currId); + lr->SetRegNO(regNO); + lr->SetIsNonLocal(isNonLocal); + if (isDef) { + (void)vregLive.erase(regNO); +#ifdef OPTIMIZE_FOR_PROLOG + if (updateCount) { + if (lr->GetNumDefs() == 0) { + lr->SetFrequency(lr->GetFrequency() + bb.GetFrequency()); + } + lr->IncNumDefs(); + } +#endif /* OPTIMIZE_FOR_PROLOG */ + } else { + (void)vregLive.insert(regNO); +#ifdef OPTIMIZE_FOR_PROLOG + if (updateCount) { + if (lr->GetNumUses() == 0) { + lr->SetFrequency(lr->GetFrequency() + bb.GetFrequency()); + } + lr->IncNumUses(); + } +#endif /* OPTIMIZE_FOR_PROLOG */ + } + for (const auto &pregNO : pregLive) { + lr->InsertElemToPregveto(pregNO); + } + + /* only handle it in live_in and def point? */ + uint32 bbID = bb.GetId(); + lr->SetMemberBitArrElem(bbID); + + lrVec[regNO] = lr; + + return true; +} + +bool GraphColorRegAllocator::SetupLiveRangeByOpHandlePhysicalReg(RegOperand ®Opnd, Insn &insn, regno_t regNO, + bool isDef) { + if (!regOpnd.IsPhysicalRegister()) { + return false; + } + LocalRaInfo *lraInfo = localRegVec[insn.GetBB()->GetId()]; + if (lraInfo == nullptr) { + lraInfo = cgFunc->GetMemoryPool()->New(alloc); + localRegVec[insn.GetBB()->GetId()] = lraInfo; + } + + if (isDef) { + if (FindNotIn(pregLive, regNO)) { + for (const auto &vRegNO : vregLive) { + if (IsUnconcernedReg(vRegNO)) { + continue; + } + lrVec[vRegNO]->InsertElemToPregveto(regNO); + } + } + pregLive.erase(regNO); + if (lraInfo != nullptr) { + lraInfo->SetDefCntElem(regNO, lraInfo->GetDefCntElem(regNO) + 1); + } + } else { + (void)pregLive.insert(regNO); + for (const auto &vregNO : vregLive) { + if (IsUnconcernedReg(vregNO)) { + continue; + } + LiveRange *lr = lrVec[vregNO]; + lr->InsertElemToPregveto(regNO); + } + + if (lraInfo != nullptr) { + lraInfo->SetUseCntElem(regNO, lraInfo->GetUseCntElem(regNO) + 1); + } + } + return true; +} + +/* + * add pregs to forbidden list of lr. If preg is in + * the live list, then it is forbidden for other vreg on the list. + */ +void GraphColorRegAllocator::SetupLiveRangeByOp(Operand &op, Insn &insn, bool isDef, uint32 &numUses) { + if (!op.IsRegister()) { + return; + } + auto ®Opnd = static_cast(op); + uint32 regNO = regOpnd.GetRegisterNumber(); + if (IsUnconcernedReg(regOpnd)) { + if (lrVec[regNO] != nullptr) { + ASSERT(false, "Unconcerned reg"); + lrVec[regNO] = nullptr; + } + return; + } + if (SetupLiveRangeByOpHandlePhysicalReg(regOpnd, insn, regNO, isDef)) { + return; + } + if (!CreateLiveRange(regNO, *insn.GetBB(), isDef, insn.GetId(), true)) { + return; + } + LiveRange *lr = lrVec[regNO]; + if (lr->GetRegType() == kRegTyUndef) { + lr->SetRegType(regOpnd.GetRegisterType()); + } + if (isDef) { + lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->IncDefNum(); + } else { + lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->IncUseNum(); + ++numUses; + } +#ifdef MOVE_COALESCE + if (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr) { + RegOperand &opnd = static_cast(insn.GetOperand(1)); + if (opnd.GetRegisterNumber() < kNArmRegisters) { + lr->InsertElemToPrefs(opnd->GetRegisterNumber() - R0); + } + } +#endif /* MOVE_COALESCE */ +} + +/* handle live range for bb->live_out */ +void GraphColorRegAllocator::SetupLiveRangeByRegNO(regno_t liveOut, BB &bb, uint32 currPoint) { + if (IsUnconcernedReg(liveOut)) { + return; + } + if (liveOut >= kNArmRegisters) { + (void)vregLive.insert(liveOut); + CreateLiveRange(liveOut, bb, false, currPoint, false); + return; + } + + (void)pregLive.insert(liveOut); + for (const auto &vregNO : vregLive) { + LiveRange *lr = lrVec[vregNO]; + lr->InsertElemToPregveto(liveOut); + } + + /* See if phys reg is livein also. Then assume it span the entire bb. */ + if (!FindIn(bb.GetLiveInRegNO(), liveOut)) { + return; + } + LocalRaInfo *lraInfo = localRegVec[bb.GetId()]; + if (lraInfo == nullptr) { + lraInfo = cgFunc->GetMemoryPool()->New(alloc); + localRegVec[bb.GetId()] = lraInfo; + } + /* Make it a large enough so no locals can be allocated. */ + lraInfo->SetUseCntElem(liveOut, kMaxUint16); +} + +void GraphColorRegAllocator::ClassifyOperand(std::unordered_set &pregs, std::unordered_set &vregs, + const Operand &opnd) { + if (!opnd.IsRegister()) { + return; + } + auto ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + if (regOpnd.IsPhysicalRegister()) { + (void)pregs.insert(regNO); + } else { + (void)vregs.insert(regNO); + } +} + +void GraphColorRegAllocator::SetOpndConflict(const Insn &insn, bool onlyDef) { + uint32 opndNum = insn.GetOperandSize(); + if (opndNum <= 1) { + return; + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn).GetMachineOpcode()]; + std::unordered_set pregs; + std::unordered_set vregs; + + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + if (!onlyDef) { + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + ClassifyOperand(pregs, vregs, *op); + } + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + ClassifyOperand(pregs, vregs, *base); + } + if (offset != nullptr) { + ClassifyOperand(pregs, vregs, *offset); + } + } else if (opnd.IsRegister()) { + ClassifyOperand(pregs, vregs, opnd); + } + } else { + if (md->GetOperand(i)->IsRegDef()) { + ClassifyOperand(pregs, vregs, opnd); + } + } + } + + if (vregs.empty()) { + return; + } + /* Set BBConflict and Pregveto */ + for (regno_t vregNO : vregs) { + for (regno_t conflictVregNO : vregs) { + if (conflictVregNO != vregNO) { + lrVec[vregNO]->SetConflictBitArrElem(conflictVregNO); + } + } + for (regno_t conflictPregNO : pregs) { + lrVec[vregNO]->InsertElemToPregveto(conflictPregNO); + } + } +} + +void GraphColorRegAllocator::UpdateOpndConflict(const Insn &insn, bool multiDef) { + /* if IsSpecialIntrinsic or IsAtomicStore, set conflicts for all opnds */ + if (insn.IsAtomicStore() || insn.IsSpecialIntrinsic()) { + SetOpndConflict(insn, false); + return; + } + if (multiDef) { + SetOpndConflict(insn, true); + } +} + +void GraphColorRegAllocator::ComputeLiveRangesForEachDefOperand(Insn &insn, bool &multiDef) { + uint32 numDefs = 0; + uint32 numUses = 0; + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn).GetMachineOpcode()]; + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + if (!md->GetOperand(i)->IsRegDef()) { + continue; + } + Operand &opnd = insn.GetOperand(i); + SetupLiveRangeByOp(opnd, insn, true, numUses); + ++numDefs; + } + ASSERT(numUses == 0, "should only be def opnd"); + if (numDefs > 1) { + multiDef = true; + needExtraSpillReg = true; + } +} + +void GraphColorRegAllocator::ComputeLiveRangesForEachUseOperand(Insn &insn) { + uint32 numUses = 0; + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn).GetMachineOpcode()]; + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + if (md->GetOperand(i)->IsRegDef() && !md->GetOperand(i)->IsRegUse()) { + continue; + } + Operand &opnd = insn.GetOperand(i); + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + SetupLiveRangeByOp(*op, insn, false, numUses); + } + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + SetupLiveRangeByOp(*base, insn, false, numUses); + } + if (offset != nullptr) { + SetupLiveRangeByOp(*offset, insn, false, numUses); + } + } else { + SetupLiveRangeByOp(opnd, insn, false, numUses); + } + } + if (numUses >= AArch64Abi::kNormalUseOperandNum || + static_cast(insn).GetMachineOpcode() == MOP_lazy_ldr) { + needExtraSpillReg = true; + } +} + +void GraphColorRegAllocator::ComputeLiveRangesUpdateIfInsnIsCall(const Insn &insn) { + if (!insn.IsCall()) { + return; + } + /* def the return value */ + pregLive.erase(R0); + pregLive.erase(V0); + + /* active the parametes */ + Operand &opnd1 = insn.GetOperand(1); + if (opnd1.IsList()) { + auto &srcOpnds = static_cast(opnd1); + for (auto regOpnd : srcOpnds.GetOperands()) { + ASSERT(!regOpnd->IsVirtualRegister(), "not be a virtual register"); + auto physicalReg = static_cast(regOpnd->GetRegisterNumber()); + (void)pregLive.insert(physicalReg); + } + } +} + +void GraphColorRegAllocator::ComputeLiveRangesUpdateLiveUnitInsnRange(BB &bb, uint32 currPoint) { + for (auto lin : bb.GetLiveInRegNO()) { + if (lin < kNArmRegisters) { + continue; + } + LiveRange *lr = lrVec[lin]; + if (lr == nullptr) { + continue; + } + auto lu = lr->FindInLuMap(bb.GetId()); + ASSERT(lu != lr->EndOfLuMap(), "container empty check"); + if (bb.GetFirstInsn()) { + lu->second->SetBegin(bb.GetFirstInsn()->GetId()); + } else { + /* since bb is empty, then use pointer as is */ + lu->second->SetBegin(currPoint); + } + lu->second->SetBegin(lu->second->GetBegin() - 1); + } +} + +bool GraphColorRegAllocator::UpdateInsnCntAndSkipUseless(Insn &insn, uint32 &currPoint) { + insn.SetId(currPoint); + if (insn.IsImmaterialInsn() || !insn.IsMachineInstruction()) { + --currPoint; + return true; + } + return false; +} + +void GraphColorRegAllocator::UpdateCallInfo(uint32 bbId) { + for (auto vregNO : vregLive) { + LiveRange *lr = lrVec[vregNO]; + lr->IncNumCall(); + + auto lu = lr->FindInLuMap(bbId); + if (lu != lr->EndOfLuMap()) { + lu->second->SetHasCall(true); + } + } +} + +void GraphColorRegAllocator::SetupMustAssignedLiveRanges(const Insn &insn) { + if (!insn.IsSpecialIntrinsic()) { + return; + } + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + if (!opnd.IsRegister()) { + continue; + } + auto ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + LiveRange *lr = lrVec[regNO]; + if (lr != nullptr) { + lr->SetMustAssigned(); + lr->SetIsNonLocal(true); + } + } +} + +/* + * For each succ bb->GetSuccs(), if bb->liveout - succ->livein is not empty, the vreg(s) is + * dead on this path (but alive on the other path as there is some use of it on the + * other path). This might be useful for optimization of reload placement later for + * splits (lr split into lr1 & lr2 and lr2 will need to reload.) + * Not for now though. + */ +void GraphColorRegAllocator::ComputeLiveRanges() { + bbVec.clear(); + bbVec.resize(cgFunc->NumBBs()); + + uint32 currPoint = cgFunc->GetTotalNumberOfInstructions() + sortedBBs.size(); + /* distinguish use/def */ + CHECK_FATAL(currPoint < (INT_MAX >> 2), "integer overflow check"); + currPoint = currPoint << 2; + for (size_t bbIdx = sortedBBs.size(); bbIdx > 0; --bbIdx) { + BB *bb = sortedBBs[bbIdx - 1]; + bbVec[bb->GetId()] = bb; + bb->SetLevel(bbIdx - 1); + + pregLive.clear(); + vregLive.clear(); + for (auto liveOut : bb->GetLiveOutRegNO()) { + SetupLiveRangeByRegNO(liveOut, *bb, currPoint); + } + --currPoint; + + if (bb->GetLastInsn() != nullptr && bb->GetLastInsn()->IsCall()) { + UpdateCallInfo(bb->GetId()); + } + + FOR_BB_INSNS_REV_SAFE(insn, bb, ninsn) { + if (UpdateInsnCntAndSkipUseless(*insn, currPoint)) { + if (ninsn != nullptr && ninsn->IsCall()) { + UpdateCallInfo(bb->GetId()); + } + continue; + } + + bool multiDef = false; + ComputeLiveRangesForEachDefOperand(*insn, multiDef); + ComputeLiveRangesForEachUseOperand(*insn); + + UpdateOpndConflict(*insn, multiDef); + SetupMustAssignedLiveRanges(*insn); + + if (ninsn != nullptr && ninsn->IsCall()) { + UpdateCallInfo(bb->GetId()); + } + + ComputeLiveRangesUpdateIfInsnIsCall(*insn); + /* distinguish use/def */ + currPoint -= 2; + } + ComputeLiveRangesUpdateLiveUnitInsnRange(*bb, currPoint); + /* move one more step for each BB */ + --currPoint; + } + + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "After ComputeLiveRanges\n"; + PrintLiveRanges(); +#ifdef USE_LRA + PrintLocalRAInfo("After ComputeLiveRanges"); +#endif /* USE_LRA */ + } +} + +/* Create a common stack space for spilling with need_spill */ +MemOperand *GraphColorRegAllocator::CreateSpillMem(uint32 spillIdx) { + if (spillIdx >= spillMemOpnds.size()) { + return nullptr; + } + + if (spillMemOpnds[spillIdx] == nullptr) { + regno_t reg = cgFunc->NewVReg(kRegTyInt, sizeof(int64)); + auto *a64CGFunc = static_cast(cgFunc); + spillMemOpnds[spillIdx] = a64CGFunc->GetOrCreatSpillMem(reg); + } + return spillMemOpnds[spillIdx]; +} + +bool GraphColorRegAllocator::IsLocalReg(regno_t regNO) const { + LiveRange *lr = lrVec[regNO]; + if (lr == nullptr) { + LogInfo::MapleLogger() << "unexpected regNO" << regNO; + return true; + } + return IsLocalReg(*lr); +} + +bool GraphColorRegAllocator::IsLocalReg(LiveRange &lr) const { + return !lr.GetSplitLr() && (lr.GetNumBBMembers() == 1) && !lr.IsNonLocal(); +} + +bool GraphColorRegAllocator::CheckOverlap(uint64 val, uint32 &lastBitSet, uint32 &overlapNum, uint32 i) const { + if (val == 0) { + return false; + } + for (uint32 x = 0; x < kU64; ++x) { + if ((val & (1ULL << x)) != 0) { + ++overlapNum; + lastBitSet = i * kU64 + x; + if (overlapNum > 1) { + return true; + } + } + } + return false; +} + +void GraphColorRegAllocator::CheckInterference(LiveRange &lr1, LiveRange &lr2) const { + uint64 bitArr[bbBuckets]; + for (uint32 i = 0; i < bbBuckets; ++i) { + bitArr[i] = lr1.GetBBMember()[i] & lr2.GetBBMember()[i]; + } + + uint32 lastBitSet = 0; + uint32 overlapNum = 0; + for (uint32 i = 0; i < bbBuckets; ++i) { + uint64 val = bitArr[i]; + if (CheckOverlap(val, lastBitSet, overlapNum, i)) { + break; + } + } + regno_t lr1RegNO = lr1.GetRegNO(); + regno_t lr2RegNO = lr2.GetRegNO(); + if (overlapNum == 1) { + /* + * begin and end should be in the bb info (LU) + * Need to rethink this if. + * Under some circumstance, lr->begin can occur after lr->end. + */ + auto lu1 = lr1.FindInLuMap(lastBitSet); + auto lu2 = lr2.FindInLuMap(lastBitSet); + if (lu1 != lr1.EndOfLuMap() && lu2 != lr2.EndOfLuMap() && + !((lu1->second->GetBegin() < lu2->second->GetBegin() && lu1->second->GetEnd() < lu2->second->GetBegin()) || + (lu2->second->GetBegin() < lu1->second->GetEnd() && lu2->second->GetEnd() < lu1->second->GetBegin()))) { + lr1.SetConflictBitArrElem(lr2RegNO); + lr2.SetConflictBitArrElem(lr1RegNO); + } + } else if (overlapNum != 0) { + /* interfere */ + lr1.SetConflictBitArrElem(lr2RegNO); + lr2.SetConflictBitArrElem(lr1RegNO); + } +} + +void GraphColorRegAllocator::BuildInterferenceGraphSeparateIntFp(std::vector &intLrVec, + std::vector &fpLrVec) { + for (auto *lr : lrVec) { + if (lr == nullptr || lr->GetRegNO() == 0) { + continue; + } +#ifdef USE_LRA + if (IsLocalReg(*lr)) { + continue; + } +#endif /* USE_LRA */ + if (lr->GetRegType() == kRegTyInt) { + intLrVec.emplace_back(lr); + } else if (lr->GetRegType() == kRegTyFloat) { + fpLrVec.emplace_back(lr); + } else { + ASSERT(false, "Illegal regType in BuildInterferenceGraph"); + LogInfo::MapleLogger() << "error: Illegal regType in BuildInterferenceGraph\n"; + } + } +} + +/* + * Based on intersection of LRs. When two LRs interfere, add to each other's + * interference list. + */ +void GraphColorRegAllocator::BuildInterferenceGraph() { + std::vector intLrVec; + std::vector fpLrVec; + BuildInterferenceGraphSeparateIntFp(intLrVec, fpLrVec); + + for (auto it1 = intLrVec.begin(); it1 != intLrVec.end(); ++it1) { + LiveRange *lr1 = *it1; + CalculatePriority(*lr1); + for (auto it2 = it1 + 1; it2 != intLrVec.end(); ++it2) { + LiveRange *lr2 = *it2; + if (lr1->GetRegNO() < lr2->GetRegNO()) { + CheckInterference(*lr1, *lr2); + } + } + } + + for (auto it1 = fpLrVec.begin(); it1 != fpLrVec.end(); ++it1) { + LiveRange *lr1 = *it1; + CalculatePriority(*lr1); + for (auto it2 = it1 + 1; it2 != fpLrVec.end(); ++it2) { + LiveRange *lr2 = *it2; + if (lr1->GetRegNO() < lr2->GetRegNO()) { + CheckInterference(*lr1, *lr2); + } + } + } + + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "After BuildInterferenceGraph\n"; + PrintLiveRanges(); + } +} + +void GraphColorRegAllocator::SetBBInfoGlobalAssigned(uint32 bbID, regno_t regNO) { + ASSERT(bbID < bbRegInfo.size(), "index out of range in GraphColorRegAllocator::SetBBInfoGlobalAssigned"); + BBAssignInfo *bbInfo = bbRegInfo[bbID]; + if (bbInfo == nullptr) { + bbInfo = cgFunc->GetMemoryPool()->New(alloc); + bbRegInfo[bbID] = bbInfo; + bbInfo->InitGlobalAssigned(); + } + bbInfo->InsertElemToGlobalsAssigned(regNO); +} + +bool GraphColorRegAllocator::HaveAvailableColor(const LiveRange &lr, uint32 num) const { + return ((lr.GetRegType() == kRegTyInt && num < intRegNum) || (lr.GetRegType() == kRegTyFloat && num < fpRegNum)); +} + +/* + * If the members on the interference list is less than #colors, then + * it can be trivially assigned a register. Otherwise it is constrained. + * Separate the LR based on if it is contrained or not. + * + * The unconstrained LRs are colored last. + * + * Compute a sorted list of constrained LRs based on priority cost. + */ +void GraphColorRegAllocator::Separate() { + for (auto *lr : lrVec) { + if (lr == nullptr) { + continue; + } +#ifdef USE_LRA + if (IsLocalReg(*lr)) { + continue; + } +#endif /* USE_LRA */ +#ifdef OPTIMIZE_FOR_PROLOG + if (((lr->GetNumDefs() <= 1) && (lr->GetNumUses() <= 1) && (lr->GetNumCall() > 0)) && + (lr->GetFrequency() <= (cgFunc->GetFirstBB()->GetFrequency() << 1))) { + if (lr->GetRegType() == kRegTyInt) { + intDelayed.emplace_back(lr); + } else { + fpDelayed.emplace_back(lr); + } + continue; + } +#endif /* OPTIMIZE_FOR_PROLOG */ + if (HaveAvailableColor(*lr, lr->GetNumBBConflicts() + lr->GetPregvetoSize() + lr->GetForbiddenSize())) { + unconstrained.emplace_back(lr); + } else if (lr->IsMustAssigned()) { + mustAssigned.emplace_back(lr); + } else { + constrained.emplace_back(lr); + } + } + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "Unconstrained : "; + for (auto lr : unconstrained) { + LogInfo::MapleLogger() << lr->GetRegNO() << " "; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "Constrained : "; + for (auto lr : constrained) { + LogInfo::MapleLogger() << lr->GetRegNO() << " "; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "mustAssigned : "; + for (auto lr : mustAssigned) { + LogInfo::MapleLogger() << lr->GetRegNO() << " "; + } + LogInfo::MapleLogger() << "\n"; + } +} + +MapleVector::iterator GraphColorRegAllocator::GetHighPriorityLr(MapleVector &lrSet) const { + auto it = lrSet.begin(); + auto highestIt = it; + LiveRange *startLr = *it; + float maxPrio = startLr->GetPriority(); + ++it; + for (; it != lrSet.end(); ++it) { + LiveRange *lr = *it; + if (lr->GetPriority() > maxPrio) { + maxPrio = lr->GetPriority(); + highestIt = it; + } + } + return highestIt; +} + +void GraphColorRegAllocator::UpdateForbiddenForNeighbors(LiveRange &lr) const { + auto updateForbidden = [&lr, this] (regno_t regNO) { + LiveRange *newLr = lrVec[regNO]; + if (!newLr->GetPregveto(lr.GetAssignedRegNO())) { + newLr->InsertElemToForbidden(lr.GetAssignedRegNO()); + } + }; + ForEachRegArrElem(lr.GetBBConflict(), updateForbidden); +} + +void GraphColorRegAllocator::UpdatePregvetoForNeighbors(LiveRange &lr) const { + auto updatePregveto = [&lr, this] (regno_t regNO) { + LiveRange *newLr = lrVec[regNO]; + newLr->InsertElemToPregveto(lr.GetAssignedRegNO()); + newLr->EraseElemFromForbidden(lr.GetAssignedRegNO()); + }; + ForEachRegArrElem(lr.GetBBConflict(), updatePregveto); +} + +/* + * For cases with only one def/use and crosses a call. + * It might be more beneficial to spill vs save/restore in prolog/epilog. + * But if the callee register is already used, then it is ok to reuse it again. + * Or in certain cases, just use the callee. + */ +bool GraphColorRegAllocator::ShouldUseCallee(LiveRange &lr, const MapleSet &calleeUsed, + const MapleVector &delayed) const { + if (FindIn(calleeUsed, lr.GetAssignedRegNO())) { + return true; + } + if (AArch64Abi::IsCalleeSavedReg(static_cast(lr.GetAssignedRegNO())) && + (calleeUsed.size() % kDivide2) != 0) { + return true; + } + if (delayed.size() > 1 && calleeUsed.empty()) { + /* If there are more than 1 vreg that can benefit from callee, use callee */ + return true; + } + lr.SetAssignedRegNO(0); + return false; +} + +void GraphColorRegAllocator::AddCalleeUsed(regno_t regNO, RegType regType) { + ASSERT(AArch64isa::IsPhysicalRegister(regNO), "regNO should be physical register"); + bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast(regNO)); + if (isCalleeReg) { + if (regType == kRegTyInt) { + (void)intCalleeUsed.insert(regNO); + } else { + (void)fpCalleeUsed.insert(regNO); + } + } +} + +regno_t GraphColorRegAllocator::FindColorForLr(const LiveRange &lr) const { + regno_t base; + RegType regType = lr.GetRegType(); + const MapleSet *currRegSet = nullptr; + const MapleSet *nextRegSet = nullptr; + if (regType == kRegTyInt) { + if (lr.GetNumCall() != 0) { + currRegSet = &intCalleeRegSet; + nextRegSet = &intCallerRegSet; + } else { + currRegSet = &intCallerRegSet; + nextRegSet = &intCalleeRegSet; + } + base = R0; + } else { + if (lr.GetNumCall() != 0) { + currRegSet = &fpCalleeRegSet; + nextRegSet = &fpCallerRegSet; + } else { + currRegSet = &fpCallerRegSet; + nextRegSet = &fpCalleeRegSet; + } + base = V0; + } + + regno_t reg; +#ifdef MOVE_COALESCE + for (const auto &it : lr.GetPrefs()) { + reg = it + base; + if ((FindIn(*currRegSet, reg) || FindIn(*nextRegSet, reg)) && !lr.GetForbidden(reg) && !lr.GetPregveto(reg)) { + return reg; + } + } +#endif /* MOVE_COALESCE */ + for (const auto &it : *currRegSet) { + reg = it + base; + if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg)) { + return reg; + } + } + /* Failed to allocate in first choice. Try 2nd choice. */ + for (const auto &it : *nextRegSet) { + reg = it + base; + if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg)) { + return reg; + } + } + ASSERT(false, "Failed to find a register"); + return 0; +} + +/* + * If forbidden list has more registers than max of all BB's local reg + * requirement, then LR can be colored. + * Update LR's color if success, return true, else return false. + */ +bool GraphColorRegAllocator::AssignColorToLr(LiveRange &lr, bool isDelayed) { + if (lr.GetAssignedRegNO() > 0) { + /* Already assigned. */ + return true; + } + if (!HaveAvailableColor(lr, lr.GetForbiddenSize() + lr.GetPregvetoSize())) { + return false; + } + lr.SetAssignedRegNO(FindColorForLr(lr)); + if (lr.GetAssignedRegNO() == 0) { + return false; + } +#ifdef OPTIMIZE_FOR_PROLOG + if (isDelayed) { + if ((lr.GetRegType() == kRegTyInt && !ShouldUseCallee(lr, intCalleeUsed, intDelayed)) || + (lr.GetRegType() == kRegTyFloat && !ShouldUseCallee(lr, fpCalleeUsed, fpDelayed))) { + return false; + } + } +#endif /* OPTIMIZE_FOR_PROLOG */ + + AddCalleeUsed(lr.GetAssignedRegNO(), lr.GetRegType()); + + UpdateForbiddenForNeighbors(lr); + ForEachBBArrElem(lr.GetBBMember(), + [&lr, this](uint32 bbID) { SetBBInfoGlobalAssigned(bbID, lr.GetAssignedRegNO()); }); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "assigned " << lr.GetAssignedRegNO() << " to R" << lr.GetRegNO() << "\n"; + } + return true; +} + +void GraphColorRegAllocator::PruneLrForSplit(LiveRange &lr, BB &bb, bool remove, + std::set &candidateInLoop, + std::set &defInLoop) { + if (bb.GetInternalFlag1()) { + /* already visited */ + return; + } + + bb.SetInternalFlag1(true); + auto lu = lr.FindInLuMap(bb.GetId()); + uint32 defNum = 0; + uint32 useNum = 0; + if (lu != lr.EndOfLuMap()) { + defNum = lu->second->GetDefNum(); + useNum = lu->second->GetUseNum(); + } + + if (remove) { + /* In removal mode, has not encountered a ref yet. */ + if (defNum == 0 && useNum == 0) { + if (bb.GetLoop() != nullptr && FindIn(candidateInLoop, bb.GetLoop())) { + /* + * Upward search has found a loop. Regardless of def/use + * The loop members must be included in the new LR. + */ + remove = false; + } else { + /* No ref in this bb. mark as potential remove. */ + bb.SetInternalFlag2(true); + return; + } + } else { + /* found a ref, no more removal of bb and preds. */ + remove = false; + } + } + + if (bb.GetLoop() != nullptr) { + /* With a def in loop, cannot prune that loop */ + if (defNum > 0) { + (void)defInLoop.insert(bb.GetLoop()); + } + /* bb in loop, need to make sure of loop carried dependency */ + (void)candidateInLoop.insert(bb.GetLoop()); + } + for (auto pred : bb.GetPreds()) { + if (FindNotIn(bb.GetLoopPreds(), pred)) { + PruneLrForSplit(lr, *pred, remove, candidateInLoop, defInLoop); + } + } + for (auto pred : bb.GetEhPreds()) { + if (FindNotIn(bb.GetLoopPreds(), pred)) { + PruneLrForSplit(lr, *pred, remove, candidateInLoop, defInLoop); + } + } +} + +void GraphColorRegAllocator::FindBBSharedInSplit(LiveRange &lr, std::set &candidateInLoop, + std::set &defInLoop) { + /* A loop might be split into two. Need to see over the entire LR if there is a def in the loop. */ + auto FindBBSharedFunc = [&lr, &candidateInLoop, &defInLoop, this](uint32 bbID) { + BB *bb = bbVec[bbID]; + if (bb->GetLoop() != nullptr && FindIn(candidateInLoop, bb->GetLoop())) { + auto lu = lr.FindInLuMap(bb->GetId()); + if (lu != lr.EndOfLuMap() && lu->second->GetDefNum() > 0) { + (void)defInLoop.insert(bb->GetLoop()); + } + } + }; + ForEachBBArrElem(lr.GetBBMember(), FindBBSharedFunc); +} + +/* + * Backward traversal of the top part of the split LR. + * Prune the part of the LR that has no downward exposing references. + * Take into account of loops and loop carried dependencies. + * The candidate bb to be removed, if in a loop, store that info. + * If a LR crosses a loop, even if the loop has no def/use, it must + * be included in the new LR. + */ +void GraphColorRegAllocator::ComputeBBForNewSplit(LiveRange &newLr, LiveRange &origLr) { + /* + * The candidate bb to be removed, if in a loop, store that info. + * If a LR crosses a loop, even if the loop has no def/use, it must + * be included in the new LR. + */ + std::set candidateInLoop; + /* If a bb has a def and is in a loop, store that info. */ + std::set defInLoop; + std::set smember; + ForEachBBArrElem(newLr.GetBBMember(), [this, &smember](uint32 bbID) { (void)smember.insert(bbVec[bbID]); }); + for (auto bbIt = smember.rbegin(); bbIt != smember.rend(); ++bbIt) { + BB *bb = *bbIt; + if (bb->GetInternalFlag1() != 0) { + continue; + } + PruneLrForSplit(newLr, *bb, true, candidateInLoop, defInLoop); + } + FindBBSharedInSplit(origLr, candidateInLoop, defInLoop); + auto pruneTopLr = [this, &newLr, &candidateInLoop, &defInLoop] (uint32 bbID) { + BB *bb = bbVec[bbID]; + if (bb->GetInternalFlag2() != 0) { + if (bb->GetLoop() != nullptr && FindIn(candidateInLoop, bb->GetLoop())) { + return; + } + if (bb->GetLoop() != nullptr || FindNotIn(defInLoop, bb->GetLoop())) { + /* defInLoop should be a subset of candidateInLoop. remove. */ + newLr.UnsetMemberBitArrElem(bbID); + } + } + }; + ForEachBBArrElem(newLr.GetBBMember(), pruneTopLr); /* prune the top LR. */ +} + +bool GraphColorRegAllocator::UseIsUncovered(BB &bb, const BB &startBB) { + for (auto pred : bb.GetPreds()) { + if (pred->GetLevel() <= startBB.GetLevel()) { + return true; + } + if (UseIsUncovered(*pred, startBB)) { + return true; + } + } + for (auto pred : bb.GetEhPreds()) { + if (pred->GetLevel() <= startBB.GetLevel()) { + return true; + } + if (UseIsUncovered(*pred, startBB)) { + return true; + } + } + return false; +} + +void GraphColorRegAllocator::FindUseForSplit(LiveRange &lr, SplitBBInfo &bbInfo, bool &remove, + std::set &candidateInLoop, + std::set &defInLoop) { + BB *bb = bbInfo.GetCandidateBB(); + const BB *startBB = bbInfo.GetStartBB(); + if (bb->GetInternalFlag1() != 0) { + /* already visited */ + return; + } + for (auto pred : bb->GetPreds()) { + if (pred->GetInternalFlag1() == 0) { + return; + } + } + for (auto pred : bb->GetEhPreds()) { + if (pred->GetInternalFlag1() == 0) { + return; + } + } + + bb->SetInternalFlag1(true); + auto lu = lr.FindInLuMap(bb->GetId()); + uint32 defNum = 0; + uint32 useNum = 0; + if (lu != lr.EndOfLuMap()) { + defNum = lu->second->GetDefNum(); + useNum = lu->second->GetUseNum(); + } + + if (remove) { + /* In removal mode, has not encountered a ref yet. */ + if (defNum == 0 && useNum == 0) { + /* No ref in this bb. mark as potential remove. */ + bb->SetInternalFlag2(true); + if (bb->GetLoop() != nullptr) { + /* bb in loop, need to make sure of loop carried dependency */ + (void)candidateInLoop.insert(bb->GetLoop()); + } + } else { + /* found a ref, no more removal of bb and preds. */ + remove = false; + /* A potential point for a upward exposing use. (might be a def). */ + lu->second->SetNeedReload(true); + } + } else if ((defNum > 0 || useNum > 0) && UseIsUncovered(*bb, *startBB)) { + lu->second->SetNeedReload(true); + } + + /* With a def in loop, cannot prune that loop */ + if (bb->GetLoop() != nullptr && defNum > 0) { + (void)defInLoop.insert(bb->GetLoop()); + } + + for (auto succ : bb->GetSuccs()) { + if (FindNotIn(bb->GetLoopSuccs(), succ)) { + bbInfo.SetCandidateBB(*succ); + FindUseForSplit(lr, bbInfo, remove, candidateInLoop, defInLoop); + } + } + for (auto succ : bb->GetEhSuccs()) { + if (FindNotIn(bb->GetLoopSuccs(), succ)) { + bbInfo.SetCandidateBB(*succ); + FindUseForSplit(lr, bbInfo, remove, candidateInLoop, defInLoop); + } + } +} + +void GraphColorRegAllocator::ClearLrBBFlags(const std::set &member) { + for (auto bb : member) { + bb->SetInternalFlag1(0); + bb->SetInternalFlag2(0); + for (auto pred : bb->GetPreds()) { + pred->SetInternalFlag1(0); + pred->SetInternalFlag2(0); + } + for (auto pred : bb->GetEhPreds()) { + pred->SetInternalFlag1(0); + pred->SetInternalFlag2(0); + } + } +} + +/* + * Downward traversal of the bottom part of the split LR. + * Prune the part of the LR that has no upward exposing references. + * Take into account of loops and loop carried dependencies. + */ +void GraphColorRegAllocator::ComputeBBForOldSplit(LiveRange &newLr, LiveRange &origLr) { + /* The candidate bb to be removed, if in a loop, store that info. */ + std::set candidateInLoop; + /* If a bb has a def and is in a loop, store that info. */ + std::set defInLoop; + SplitBBInfo bbInfo; + bool remove = true; + + std::set smember; + ForEachBBArrElem(origLr.GetBBMember(), [this, &smember](uint32 bbID) { (void)smember.insert(bbVec[bbID]); }); + ClearLrBBFlags(smember); + for (auto bb : smember) { + if (bb->GetInternalFlag1() != 0) { + continue; + } + for (auto pred : bb->GetPreds()) { + pred->SetInternalFlag1(true); + } + for (auto pred : bb->GetEhPreds()) { + pred->SetInternalFlag1(true); + } + bbInfo.SetCandidateBB(*bb); + bbInfo.SetStartBB(*bb); + FindUseForSplit(origLr, bbInfo, remove, candidateInLoop, defInLoop); + } + FindBBSharedInSplit(newLr, candidateInLoop, defInLoop); + auto pruneLrFunc = [&origLr, &defInLoop, this](uint32 bbID) { + BB *bb = bbVec[bbID]; + if (bb->GetInternalFlag2() != 0) { + if (bb->GetLoop() != nullptr && FindNotIn(defInLoop, bb->GetLoop())) { + origLr.UnsetMemberBitArrElem(bbID); + } + } + }; + ForEachBBArrElem(origLr.GetBBMember(), pruneLrFunc); +} + +/* + * There is at least one available color for this BB from the neighbors + * minus the ones reserved for local allocation. + * bbAdded : The new BB to be added into the split LR if color is available. + * conflictRegs : Reprent the LR before adding the bbAdded. These are the + * forbidden regs before adding the new BBs. + * Side effect : Adding the new forbidden regs from bbAdded into + * conflictRegs if the LR can still be colored. + */ +bool GraphColorRegAllocator::LrCanBeColored(const LiveRange &lr, const BB &bbAdded, + std::unordered_set &conflictRegs) { + RegType type = lr.GetRegType(); + + std::unordered_set newConflict; + auto updateConflictFunc = [&bbAdded, &conflictRegs, &newConflict, &lr, this](regno_t regNO) { + /* check the real conflict in current bb */ + LiveRange *conflictLr = lrVec[regNO]; + /* + * If the bb to be added to the new LR has an actual + * conflict with another LR, and if that LR has already + * assigned a color that is not in the conflictRegs, + * then add it as a newConflict. + */ + if (IsBitArrElemSet(conflictLr->GetBBMember(), bbAdded.GetId())) { + regno_t confReg = conflictLr->GetAssignedRegNO(); + if ((confReg > 0) && FindNotIn(conflictRegs, confReg) && !lr.GetPregveto(confReg)) { + (void)newConflict.insert(confReg); + } + } else if (conflictLr->GetSplitLr() != nullptr && + IsBitArrElemSet(conflictLr->GetSplitLr()->GetBBMember(), bbAdded.GetId())) { + /* + * The after split LR is split into pieces, and this ensures + * the after split color is taken into consideration. + */ + regno_t confReg = conflictLr->GetSplitLr()->GetAssignedRegNO(); + if ((confReg > 0) && FindNotIn(conflictRegs, confReg) && !lr.GetPregveto(confReg)) { + (void)newConflict.insert(confReg); + } + } + }; + ForEachRegArrElem(lr.GetBBConflict(), updateConflictFunc); + + size_t numRegs = newConflict.size() + lr.GetPregvetoSize() + conflictRegs.size(); + + bool canColor = false; + if (type == kRegTyInt) { + if (numRegs < intRegNum) { + canColor = true; + } + } else if (numRegs < fpRegNum) { + canColor = true; + } + + if (canColor) { + for (auto regNO : newConflict) { + (void)conflictRegs.insert(regNO); + } + } + + /* Update all the registers conflicting when adding thew new bb. */ + return canColor; +} + +/* Support function for LR split. Move one BB from LR1 to LR2. */ +void GraphColorRegAllocator::MoveLrBBInfo(LiveRange &oldLr, LiveRange &newLr, BB &bb) { + /* initialize backward traversal flag for the bb pruning phase */ + bb.SetInternalFlag1(false); + /* initialize bb removal marker */ + bb.SetInternalFlag2(false); + /* Insert BB into new LR */ + uint32 bbID = bb.GetId(); + newLr.SetMemberBitArrElem(bbID); + + /* Move LU from old LR to new LR */ + auto luIt = oldLr.FindInLuMap(bb.GetId()); + if (luIt != oldLr.EndOfLuMap()) { + newLr.SetElemToLuMap(luIt->first, *(luIt->second)); + oldLr.EraseLuMap(luIt); + } + + /* Remove BB from old LR */ + oldLr.UnsetMemberBitArrElem(bbID); +} + +/* Is the set of loops inside the loop? */ +bool GraphColorRegAllocator::ContainsLoop(const CGFuncLoops &loop, + const std::set &loops) const { + for (const CGFuncLoops *lp : loops) { + while (lp != nullptr) { + if (lp == &loop) { + return true; + } + lp = lp->GetOuterLoop(); + } + } + return false; +} + +void GraphColorRegAllocator::GetAllLrMemberLoops(LiveRange &lr, std::set &loops) { + auto GetLrMemberFunc = [&loops, this](uint32 bbID) { + BB *bb = bbVec[bbID]; + CGFuncLoops *loop = bb->GetLoop(); + if (loop != nullptr) { + (void)loops.insert(loop); + } + }; + ForEachBBArrElem(lr.GetBBMember(), GetLrMemberFunc); +} + +bool GraphColorRegAllocator::SplitLrShouldSplit(LiveRange &lr) { + if (lr.GetSplitLr() != nullptr || lr.GetNumBBMembers() == 1) { + return false; + } + + /* Need to split within the same hierarchy */ + uint32 loopID = 0xFFFFFFFF; /* loopID is initialized the maximum value,and then be assigned in function */ + bool needSplit = true; + auto setNeedSplit = [&needSplit, &loopID, this](uint32 bbID) -> bool { + BB *bb = bbVec[bbID]; + if (loopID == 0xFFFFFFFF) { + if (bb->GetLoop() != nullptr) { + loopID = static_cast(bb->GetLoop()->GetHeader()->GetId()); + } else { + loopID = 0; + } + } else if ((bb->GetLoop() != nullptr && bb->GetLoop()->GetHeader()->GetId() != loopID) || + (bb->GetLoop() == nullptr && loopID != 0)) { + needSplit = false; + return true; + } + return false; + }; + ForEachBBArrElemWithInterrupt(lr.GetBBMember(), setNeedSplit); + return needSplit; +} + +/* + * When a BB in the LR has no def or use in it, then potentially + * there is no conflict within these BB for the new LR, since + * the new LR will need to spill the defs which terminates the + * new LR unless there is a use later which extends the new LR. + * There is no need to compute conflicting register set unless + * there is a def or use. + * It is assumed that the new LR is extended to the def or use. + * Initially newLr is empty, then add bb if can be colored. + * Return true if there is a split. + */ +bool GraphColorRegAllocator::SplitLrFindCandidateLr(LiveRange &lr, LiveRange &newLr, + std::unordered_set &conflictRegs) { + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "start split lr for vreg " << lr.GetRegNO() << "\n"; + } + std::set smember; + ForEachBBArrElem(lr.GetBBMember(), [&smember, this](uint32 bbID) { (void)smember.insert(bbVec[bbID]); }); + for (auto bb : smember) { + if (!LrCanBeColored(lr, *bb, conflictRegs)) { + break; + } + MoveLrBBInfo(lr, newLr, *bb); + } + + /* return ture if split is successful */ + return newLr.GetNumBBMembers() != 0; +} + +void GraphColorRegAllocator::SplitLrHandleLoops(LiveRange &lr, LiveRange &newLr, + const std::set &origLoops, + const std::set &newLoops) { + /* + * bb in loops might need a reload due to loop carried dependency. + * Compute this before pruning the LRs. + * if there is no re-definition, then reload is not necessary. + * Part of the new LR region after the last reference is + * no longer in the LR. Remove those bb. + */ + ComputeBBForNewSplit(newLr, lr); + + /* With new LR, recompute conflict. */ + auto recomputeConflict = [&lr, &newLr, this](uint32 bbID) { + auto lrFunc = [&newLr, &bbID, this](regno_t regNO) { + LiveRange *confLrVec = lrVec[regNO]; + if (IsBitArrElemSet(confLrVec->GetBBMember(), bbID) || + (confLrVec->GetSplitLr() != nullptr && IsBitArrElemSet(confLrVec->GetSplitLr()->GetBBMember(), bbID))) { + /* + * New LR getting the interference does not mean the + * old LR can remove the interference. + * Old LR's interference will be handled at the end of split. + */ + newLr.SetConflictBitArrElem(regNO); + } + }; + ForEachRegArrElem(lr.GetBBConflict(), lrFunc); + }; + ForEachBBArrElem(newLr.GetBBMember(), recomputeConflict); + + /* update bb/loop same as for new LR. */ + ComputeBBForOldSplit(newLr, lr); + /* Update the conflict interference for the original LR later. */ + for (auto loop : newLoops) { + if (!ContainsLoop(*loop, origLoops)) { + continue; + } + for (auto bb : loop->GetLoopMembers()) { + if (!IsBitArrElemSet(newLr.GetBBMember(), bb->GetId())) { + continue; + } + LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bb->GetId()); + if (lu->GetUseNum() != 0) { + lu->SetNeedReload(true); + } + } + } +} + +void GraphColorRegAllocator::SplitLrFixNewLrCallsAndRlod(LiveRange &newLr, + const std::set &origLoops) { + /* If a 2nd split loop is before the bb in 1st split bb. */ + newLr.SetNumCall(0); + auto fixCallsAndRlod = [&newLr, &origLoops, this](uint32 bbID) { + BB *bb = bbVec[bbID]; + for (auto loop : origLoops) { + if (loop->GetHeader()->GetLevel() >= bb->GetLevel()) { + continue; + } + LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bbID); + if (lu->GetUseNum() != 0) { + lu->SetNeedReload(true); + } + } + LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bbID); + if (lu->HasCall()) { + newLr.IncNumCall(); + } + }; + ForEachBBArrElem(newLr.GetBBMember(), fixCallsAndRlod); +} + +void GraphColorRegAllocator::SplitLrFixOrigLrCalls(LiveRange &lr) { + lr.SetNumCall(0); + auto fixOrigCalls = [&lr](uint32 bbID) { + LiveUnit *lu = lr.GetLiveUnitFromLuMap(bbID); + if (lu->HasCall()) { + lr.IncNumCall(); + } + }; + ForEachBBArrElem(lr.GetBBMember(), fixOrigCalls); +} + +void GraphColorRegAllocator::SplitLrUpdateInterference(LiveRange &lr) { + /* + * newLr is now a separate LR from the original lr. + * Update the interference info. + * Also recompute the forbidden info + */ + lr.ClearForbidden(); + auto updateInterfrence = [&lr, this](regno_t regNO) { + LiveRange *confLrVec = lrVec[regNO]; + if (IsBBsetOverlap(lr.GetBBMember(), confLrVec->GetBBMember(), bbBuckets)) { + /* interfere */ + if (confLrVec->GetAssignedRegNO() && !lr.GetPregveto(confLrVec->GetAssignedRegNO())) { + lr.InsertElemToForbidden(confLrVec->GetAssignedRegNO()); + } + } else { + /* no interference */ + lr.UnsetConflictBitArrElem(regNO); + } + }; + ForEachRegArrElem(lr.GetBBConflict(), updateInterfrence); +} + +void GraphColorRegAllocator::SplitLrUpdateRegInfo(LiveRange &origLr, LiveRange &newLr, + std::unordered_set &conflictRegs) { + for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) { + if (origLr.GetPregveto(regNO)) { + newLr.InsertElemToPregveto(regNO); + } + } + for (auto regNO : conflictRegs) { + if (!newLr.GetPregveto(regNO)) { + newLr.InsertElemToForbidden(regNO); + } + } +} + +void GraphColorRegAllocator::SplitLrErrorCheckAndDebug(LiveRange &origLr) { + if (origLr.GetNumBBMembers() == 0) { + ASSERT(origLr.GetNumBBConflicts() == 0, "Error: member and conflict not match"); + } +} + +/* + * Pick a starting BB, then expand to maximize the new LR. + * Return the new LR. + */ +void GraphColorRegAllocator::SplitLr(LiveRange &lr) { + if (!SplitLrShouldSplit(lr)) { + return; + } + LiveRange *newLr = NewLiveRange(); + /* + * For the new LR, whenever a BB with either a def or + * use is added, then add the registers that the neighbor + * is using to the conflict register set indicating that these + * registers cannot be used for the new LR's color. + */ + std::unordered_set conflictRegs; + if (!SplitLrFindCandidateLr(lr, *newLr, conflictRegs)) { + return; + } +#ifdef REUSE_SPILLMEM + /* Copy the original conflict vector for spill reuse optimization */ + lr.SetOldConflict(cgFunc->GetMemoryPool()->NewArray(regBuckets)); + for (uint32 i = 0; i < regBuckets; ++i) { + lr.SetBBConflictElem(i, lr.GetBBConflictElem(i)); + } +#endif /* REUSE_SPILLMEM */ + + std::set newLoops; + std::set origLoops; + GetAllLrMemberLoops(*newLr, newLoops); + GetAllLrMemberLoops(lr, origLoops); + SplitLrHandleLoops(lr, *newLr, origLoops, newLoops); + SplitLrFixNewLrCallsAndRlod(*newLr, origLoops); + SplitLrFixOrigLrCalls(lr); + + SplitLrUpdateRegInfo(lr, *newLr, conflictRegs); + + CalculatePriority(lr); + /* At this point, newLr should be unconstrained. */ + lr.SetSplitLr(*newLr); + + newLr->SetRegNO(lr.GetRegNO()); + newLr->SetRegType(lr.GetRegType()); + newLr->SetID(lr.GetID()); + CalculatePriority(*newLr); + SplitLrUpdateInterference(lr); + newLr->SetAssignedRegNO(FindColorForLr(*newLr)); + + AddCalleeUsed(newLr->GetAssignedRegNO(), newLr->GetRegType()); + + /* For the new LR, update assignment for local RA */ + ForEachBBArrElem(newLr->GetBBMember(), + [&newLr, this](uint32 bbID) { SetBBInfoGlobalAssigned(bbID, newLr->GetAssignedRegNO()); }); + + UpdatePregvetoForNeighbors(*newLr); + + SplitLrErrorCheckAndDebug(lr); +} + +void GraphColorRegAllocator::ColorForOptPrologEpilog() { +#ifdef OPTIMIZE_FOR_PROLOG + for (auto lr : intDelayed) { + if (!AssignColorToLr(*lr, true)) { + lr->SetSpilled(true); + } + } + for (auto lr : fpDelayed) { + if (!AssignColorToLr(*lr, true)) { + lr->SetSpilled(true); + } + } +#endif +} + +/* + * From the sorted list of constrained LRs, pick the most profitable LR. + * Split the LR into LRnew1 LRnew2 where LRnew1 has the maximum number of + * BB and is colorable. + * The starting BB for traversal must have a color available. + * + * Assign a color, update neighbor's forbidden list. + * + * Update the conflict graph by change the interference list. + * In the case of both LRnew1 and LRnew2 conflicts with a BB, this BB's + * #neightbors increased. If this BB was unconstrained, must check if + * it is still unconstrained. Move to constrained if necessary. + * + * Color the unconstrained LRs. + */ +void GraphColorRegAllocator::SplitAndColorForEachLr(MapleVector &targetLrVec, bool isConstrained) { + while (!targetLrVec.empty()) { + auto highestIt = GetHighPriorityLr(targetLrVec); + LiveRange *lr = *highestIt; + /* check those lrs in lr->sconflict which is in unconstrained whether it turns to constrined */ + if (highestIt != targetLrVec.end()) { + targetLrVec.erase(highestIt); + } else { + ASSERT(false, "Error: not in targetLrVec"); + } + if (AssignColorToLr(*lr)) { + continue; + } + if (!isConstrained) { + ASSERT(false, "unconstrained lr should be colorable"); + LogInfo::MapleLogger() << "error: LR should be colorable " << lr->GetRegNO() << "\n"; + } +#ifdef USE_SPLIT + SplitLr(*lr); +#endif /* USE_SPLIT */ + /* + * When LR is spilled, it potentially has no conflicts as + * each def/use is spilled/reloaded. + */ +#ifdef COLOR_SPLIT + if (!AssignColorToLr(*lr)) { +#endif /* COLOR_SPLIT */ + lr->SetSpilled(true); +#ifdef COLOR_SPLIT + } +#endif /* COLOR_SPLIT */ + } +} + +void GraphColorRegAllocator::SplitAndColor() { + /* handle mustAssigned */ + SplitAndColorForEachLr(mustAssigned, true); + + /* handle constrained */ + SplitAndColorForEachLr(constrained, true); + + /* assign color for unconstained */ + SplitAndColorForEachLr(unconstrained, false); + +#ifdef OPTIMIZE_FOR_PROLOG + ColorForOptPrologEpilog(); +#endif /* OPTIMIZE_FOR_PROLOG */ +} + +void GraphColorRegAllocator::HandleLocalRegAssignment(regno_t regNO, LocalRegAllocator &localRa, bool isInt) { + /* vreg, get a reg for it if not assigned already. */ + if (!localRa.IsInRegAssigned(regNO, isInt) && !localRa.isInRegSpilled(regNO, isInt)) { + /* find an available phys reg */ + bool founded = false; + LiveRange *lr = lrVec[regNO]; + regno_t maxIntReg = R0 + MaxIntPhysRegNum(); + regno_t maxFpReg = V0 + MaxFloatPhysRegNum(); + regno_t startReg = isInt ? R0 : V0; + regno_t endReg = isInt ? maxIntReg : maxFpReg; + for (uint32 preg = startReg; preg <= endReg; ++preg) { + if (!localRa.IsPregAvailable(preg, isInt)) { + continue; + } + if (lr->GetNumCall() != 0 && !AArch64Abi::IsCalleeSavedReg(static_cast(preg))) { + continue; + } + if (lr->GetPregveto(preg)) { + continue; + } + regno_t assignedReg = preg; + localRa.ClearPregs(assignedReg, isInt); + localRa.SetPregUsed(assignedReg, isInt); + localRa.SetRegAssigned(regNO, isInt); + localRa.SetRegAssignmentMap(isInt, regNO, assignedReg); + founded = true; + break; + } + if (!founded) { + localRa.SetRegSpilled(regNO, isInt); + } + } +} + +void GraphColorRegAllocator::UpdateLocalRegDefUseCount(regno_t regNO, LocalRegAllocator &localRa, bool isDef, + bool isInt) const { + auto usedIt = localRa.GetUseInfo().find(regNO); + if (usedIt != localRa.GetUseInfo().end() && !isDef) { + /* reg use, decrement count */ + ASSERT(usedIt->second > 0, "Incorrect local ra info"); + localRa.SetUseInfoElem(regNO, usedIt->second - 1); + if (!AArch64isa::IsPhysicalRegister(static_cast(regNO)) && localRa.IsInRegAssigned(regNO, isInt)) { + localRa.IncUseInfoElem(localRa.GetRegAssignmentItem(isInt, regNO)); + } + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\t\treg " << regNO << " update #use to " << localRa.GetUseInfoElem(regNO) << "\n"; + } + } + + auto defIt = localRa.GetDefInfo().find(regNO); + if (defIt != localRa.GetDefInfo().end() && isDef) { + /* reg def, decrement count */ + ASSERT(defIt->second > 0, "Incorrect local ra info"); + localRa.SetDefInfoElem(regNO, defIt->second - 1); + if (!AArch64isa::IsPhysicalRegister(static_cast(regNO)) && localRa.IsInRegAssigned(regNO, isInt)) { + localRa.IncDefInfoElem(localRa.GetRegAssignmentItem(isInt, regNO)); + } + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\t\treg " << regNO << " update #def to " << localRa.GetDefInfoElem(regNO) << "\n"; + } + } +} + +void GraphColorRegAllocator::UpdateLocalRegConflict(regno_t regNO, LocalRegAllocator &localRa, bool isInt) { + LiveRange *lr = lrVec[regNO]; + if (lr->GetNumBBConflicts() == 0) { + return; + } + if (!localRa.IsInRegAssigned(regNO, isInt)) { + return; + } + regno_t preg = localRa.GetRegAssignmentItem(isInt, regNO); + ForEachRegArrElem(lr->GetBBConflict(), + [&preg, this](regno_t regNO) { lrVec[regNO]->InsertElemToPregveto(preg); }); +} + +void GraphColorRegAllocator::HandleLocalRaDebug(regno_t regNO, const LocalRegAllocator &localRa, bool isInt) const { + LogInfo::MapleLogger() << "HandleLocalReg " << regNO << "\n"; + LogInfo::MapleLogger() << "\tregUsed:"; + uint64 regUsed = localRa.GetPregUsed(isInt); + regno_t base = isInt ? R0 : V0; + regno_t end = isInt ? (RFP - R0) : (V31 - V0); + + for (uint32 i = 0; i <= end; ++i) { + if ((regUsed & (1ULL << i)) != 0) { + LogInfo::MapleLogger() << " " << (i + base); + } + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "\tregs:"; + uint64 regs = localRa.GetPregs(isInt); + for (uint32 regnoInLoop = 0; regnoInLoop <= end; ++regnoInLoop) { + if ((regs & (1ULL << regnoInLoop)) != 0) { + LogInfo::MapleLogger() << " " << (regnoInLoop + base); + } + } + LogInfo::MapleLogger() << "\n"; +} + +void GraphColorRegAllocator::HandleLocalReg(Operand &op, LocalRegAllocator &localRa, const BBAssignInfo *bbInfo, + bool isDef, bool isInt) { + if (!op.IsRegister()) { + return; + } + auto ®Opnd = static_cast(op); + regno_t regNO = regOpnd.GetRegisterNumber(); + + if (IsUnconcernedReg(regOpnd)) { + return; + } + + /* is this a local register ? */ + if (regNO >= kNArmRegisters && !IsLocalReg(regNO)) { + return; + } + + if (GCRA_DUMP) { + HandleLocalRaDebug(regNO, localRa, isInt); + } + + if (regOpnd.IsPhysicalRegister()) { + /* conflict with preg is record in lr->pregveto and BBAssignInfo->globalsAssigned */ + UpdateLocalRegDefUseCount(regNO, localRa, isDef, isInt); + /* See if it is needed by global RA */ + if (localRa.GetUseInfoElem(regNO) == 0 && localRa.GetDefInfoElem(regNO) == 0) { + if (bbInfo && !bbInfo->GetGlobalsAssigned(regNO)) { + /* This phys reg is now available for assignment for a vreg */ + localRa.SetPregs(regNO, isInt); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\t\tlast ref, phys-reg " << regNO << " now available\n"; + } + } + } + } else { + HandleLocalRegAssignment(regNO, localRa, isInt); + UpdateLocalRegDefUseCount(regNO, localRa, isDef, isInt); + UpdateLocalRegConflict(regNO, localRa, isInt); + if (localRa.GetUseInfoElem(regNO) == 0 && localRa.GetDefInfoElem(regNO) == 0 && + localRa.IsInRegAssigned(regNO, isInt)) { + /* last ref of vreg, release assignment */ + localRa.SetPregs(localRa.GetRegAssignmentItem(isInt, regNO), isInt); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\t\tlast ref, release reg " << localRa.GetRegAssignmentItem(isInt, regNO) + << " for " << regNO << "\n"; + } + } + } +} + +void GraphColorRegAllocator::LocalRaRegSetEraseReg(LocalRegAllocator &localRa, regno_t regNO) { + bool isInt = AArch64isa::IsGPRegister(static_cast(regNO)); + if (localRa.IsPregAvailable(regNO, isInt)) { + localRa.ClearPregs(regNO, isInt); + } +} + +bool GraphColorRegAllocator::LocalRaInitRegSet(LocalRegAllocator &localRa, uint32 bbID) { + bool needLocalRa = false; + /* Note physical regs start from R0, V0. */ + localRa.InitPregs(MaxIntPhysRegNum(), MaxFloatPhysRegNum(), cgFunc->GetCG()->GenYieldPoint(), intSpillRegSet, + fpSpillRegSet); + + localRa.ClearUseInfo(); + localRa.ClearDefInfo(); + LocalRaInfo *lraInfo = localRegVec[bbID]; + ASSERT(lraInfo != nullptr, "lraInfo not be nullptr"); + for (const auto &useCntPair : lraInfo->GetUseCnt()) { + regno_t regNO = useCntPair.first; + if (regNO >= kNArmRegisters) { + needLocalRa = true; + } + localRa.SetUseInfoElem(useCntPair.first, useCntPair.second); + } + for (const auto &defCntPair : lraInfo->GetDefCnt()) { + regno_t regNO = defCntPair.first; + if (regNO >= kNArmRegisters) { + needLocalRa = true; + } + localRa.SetDefInfoElem(defCntPair.first, defCntPair.second); + } + return needLocalRa; +} + +void GraphColorRegAllocator::LocalRaInitAllocatableRegs(LocalRegAllocator &localRa, uint32 bbID) { + BBAssignInfo *bbInfo = bbRegInfo[bbID]; + if (bbInfo != nullptr) { + for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) { + if (bbInfo->GetGlobalsAssigned(regNO)) { + LocalRaRegSetEraseReg(localRa, regNO); + } + } + } +} + +void GraphColorRegAllocator::LocalRaForEachDefOperand(const Insn &insn, LocalRegAllocator &localRa, + const BBAssignInfo *bbInfo) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn).GetMachineOpcode()]; + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + /* handle def opnd */ + if (!md->GetOperand(i)->IsRegDef()) { + continue; + } + auto ®Opnd = static_cast(opnd); + bool isInt = (regOpnd.GetRegisterType() == kRegTyInt); + HandleLocalReg(opnd, localRa, bbInfo, true, isInt); + } +} + +void GraphColorRegAllocator::LocalRaForEachUseOperand(const Insn &insn, LocalRegAllocator &localRa, + const BBAssignInfo *bbInfo) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn).GetMachineOpcode()]; + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + if (opnd.IsList()) { + continue; + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + HandleLocalReg(*base, localRa, bbInfo, false, true); + } + if (offset != nullptr) { + HandleLocalReg(*offset, localRa, bbInfo, false, true); + } + } else if (md->GetOperand(i)->IsRegUse()) { + auto ®Opnd = static_cast(opnd); + bool isInt = (regOpnd.GetRegisterType() == kRegTyInt); + HandleLocalReg(opnd, localRa, bbInfo, false, isInt); + } + } +} + +void GraphColorRegAllocator::LocalRaPrepareBB(BB &bb, LocalRegAllocator &localRa) { + BBAssignInfo *bbInfo = bbRegInfo[bb.GetId()]; + FOR_BB_INSNS(insn, &bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + + /* + * Use reverse operand order, assuming use first then def for allocation. + * need to free the use resource so it can be reused for def. + */ + LocalRaForEachUseOperand(*insn, localRa, bbInfo); + LocalRaForEachDefOperand(*insn, localRa, bbInfo); + } +} + +void GraphColorRegAllocator::LocalRaFinalAssignment(LocalRegAllocator &localRa, BBAssignInfo &bbInfo) { + for (const auto &intRegAssignmentMapPair : localRa.GetIntRegAssignmentMap()) { + regno_t regNO = intRegAssignmentMapPair.second; + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "[" << intRegAssignmentMapPair.first << "," << regNO << "],"; + } + /* Might need to get rid of this copy. */ + bbInfo.SetRegMapElem(intRegAssignmentMapPair.first, regNO); + AddCalleeUsed(regNO, kRegTyInt); + } + for (const auto &fpRegAssignmentMapPair : localRa.GetFpRegAssignmentMap()) { + regno_t regNO = fpRegAssignmentMapPair.second; + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "[" << fpRegAssignmentMapPair.first << "," << regNO << "],"; + } + /* Might need to get rid of this copy. */ + bbInfo.SetRegMapElem(fpRegAssignmentMapPair.first, regNO); + AddCalleeUsed(regNO, kRegTyFloat); + } +} + +void GraphColorRegAllocator::LocalRaDebug(BB &bb, LocalRegAllocator &localRa) { + LogInfo::MapleLogger() << "bb " << bb.GetId() << " local ra INT need " << localRa.GetNumIntPregUsed() << " regs\n"; + LogInfo::MapleLogger() << "bb " << bb.GetId() << " local ra FP need " << localRa.GetNumFpPregUsed() << " regs\n"; + LogInfo::MapleLogger() << "\tpotential assignments:"; + for (auto it : localRa.GetIntRegAssignmentMap()) { + LogInfo::MapleLogger() << "[" << it.first << "," << it.second << "],"; + } + for (auto it : localRa.GetFpRegAssignmentMap()) { + LogInfo::MapleLogger() << "[" << it.first << "," << it.second << "],"; + } + LogInfo::MapleLogger() << "\n"; +} + +/* + * When do_allocate is false, it is prepass: + * Traverse each BB, keep track of the number of registers required + * for local registers in the BB. Communicate this to global RA. + * + * When do_allocate is true: + * Allocate local registers for each BB based on unused registers + * from global RA. Spill if no register available. + */ +void GraphColorRegAllocator::LocalRegisterAllocator(bool doAllocate) { + if (GCRA_DUMP) { + if (doAllocate) { + LogInfo::MapleLogger() << "LRA allocation start\n"; + PrintBBAssignInfo(); + } else { + LogInfo::MapleLogger() << "LRA preprocessing start\n"; + } + } + LocalRegAllocator *localRa = cgFunc->GetMemoryPool()->New(*cgFunc, alloc); + for (auto *bb : sortedBBs) { + uint32 bbID = bb->GetId(); + + LocalRaInfo *lraInfo = localRegVec[bb->GetId()]; + if (lraInfo == nullptr) { + /* No locals to allocate */ + continue; + } + + localRa->ClearLocalRaInfo(); + bool needLocalRa = LocalRaInitRegSet(*localRa, bbID); + if (!needLocalRa) { + /* Only physical regs in bb, no local ra needed. */ + continue; + } + + if (doAllocate) { + LocalRaInitAllocatableRegs(*localRa, bbID); + } + + LocalRaPrepareBB(*bb, *localRa); + + BBAssignInfo *bbInfo = bbRegInfo[bb->GetId()]; + if (bbInfo == nullptr) { + bbInfo = cgFunc->GetMemoryPool()->New(alloc); + bbRegInfo[bbID] = bbInfo; + bbInfo->InitGlobalAssigned(); + } + bbInfo->SetIntLocalRegsNeeded(localRa->GetNumIntPregUsed()); + bbInfo->SetFpLocalRegsNeeded(localRa->GetNumFpPregUsed()); + + if (doAllocate) { + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\tbb(" << bb->GetId() << ")final local ra assignments:"; + } + LocalRaFinalAssignment(*localRa, *bbInfo); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\n"; + } + } else if (GCRA_DUMP) { + LocalRaDebug(*bb, *localRa); + } + } +} + +MemOperand *GraphColorRegAllocator::GetConsistentReuseMem(const uint64 *conflict, + const std::set &usedMemOpnd, + uint32 size, RegType regType) { + std::set sconflict; + regno_t regNO; + for (uint32 i = 0; i < regBuckets; ++i) { + for (uint32 b = 0; b < kU64; ++b) { + if ((conflict[i] & (1ULL << b)) != 0) { + continue; + } + regNO = i * kU64 + b; + if (regNO >= numVregs) { + break; + } + if (lrVec[regNO] != nullptr) { + (void)sconflict.insert(lrVec[regNO]); + } + } + } + + for (auto *noConflictLr : sconflict) { + if (noConflictLr == nullptr || noConflictLr->GetRegType() != regType || noConflictLr->GetSpillSize() != size) { + continue; + } + if (usedMemOpnd.find(noConflictLr->GetSpillMem()) == usedMemOpnd.end()) { + return noConflictLr->GetSpillMem(); + } + } + return nullptr; +} + +MemOperand *GraphColorRegAllocator::GetCommonReuseMem(const uint64 *conflict, const std::set &usedMemOpnd, + uint32 size, RegType regType) { + regno_t regNO; + for (uint32 i = 0; i < regBuckets; ++i) { + for (uint32 b = 0; b < kU64; ++b) { + if ((conflict[i] & (1ULL << b)) != 0) { + continue; + } + regNO = i * kU64 + b; + if (regNO >= numVregs) { + break; + } + LiveRange *noConflictLr = lrVec[regNO]; + if (noConflictLr == nullptr || noConflictLr->GetRegType() != regType || noConflictLr->GetSpillSize() != size) { + continue; + } + if (usedMemOpnd.find(noConflictLr->GetSpillMem()) == usedMemOpnd.end()) { + return noConflictLr->GetSpillMem(); + } + } + } + return nullptr; +} + +/* See if any of the non-conflict LR is spilled and use its memOpnd. */ +MemOperand *GraphColorRegAllocator::GetReuseMem(uint32 vregNO, uint32 size, RegType regType) { + if (IsLocalReg(vregNO)) { + return nullptr; + } + + LiveRange *lr = lrVec[vregNO]; + const uint64 *conflict; + if (lr->GetSplitLr() != nullptr) { + /* + * For split LR, the vreg liveness is optimized, but for spill location + * the stack location needs to be maintained for the entire LR. + */ + conflict = lr->GetOldConflict(); + } else { + conflict = lr->GetBBConflict(); + } + + std::set usedMemOpnd; + auto updateMemOpnd = [&usedMemOpnd, this](regno_t regNO) { + LiveRange *lrInner = lrVec[regNO]; + if (lrInner->GetSpillMem() != nullptr) { + (void)usedMemOpnd.insert(lrInner->GetSpillMem()); + } + }; + ForEachRegArrElem(conflict, updateMemOpnd); + uint32 regSize = (size <= k32) ? k32 : k64; + /* + * This is to order the search so memOpnd given out is consistent. + * When vreg#s do not change going through VtableImpl.mpl file + * then this can be simplified. + */ +#ifdef CONSISTENT_MEMOPND + return GetConsistentReuseMem(conflict, usedMemOpnd, regSize, regType); +#else /* CONSISTENT_MEMOPND */ + return GetCommonReuseMem(conflict, usedMemOpnd, regSize, regType); +#endif /* CONSISTENT_MEMOPNDi */ +} + +MemOperand *GraphColorRegAllocator::GetSpillMem(uint32 vregNO, bool isDest, Insn &insn, AArch64reg regNO, + bool &isOutOfRange) { + auto *a64CGFunc = static_cast(cgFunc); + MemOperand *memOpnd = a64CGFunc->GetOrCreatSpillMem(vregNO); + return (a64CGFunc->AdjustMemOperandIfOffsetOutOfRange(memOpnd, vregNO, isDest, insn, regNO, isOutOfRange)); +} + +void GraphColorRegAllocator::SpillOperandForSpillPre(Insn &insn, const Operand &opnd, RegOperand &phyOpnd, + uint32 spillIdx, bool needSpill) { + if (!needSpill) { + return; + } + MemOperand *spillMem = CreateSpillMem(spillIdx); + ASSERT(spillMem != nullptr, "spillMem nullptr check"); + auto *a64CGFunc = static_cast(cgFunc); + CG *cg = a64CGFunc->GetCG(); + + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + uint32 regSize = regOpnd.GetSize(); + PrimType stype; + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyInt) { + stype = (regSize <= k32) ? PTY_i32 : PTY_i64; + } else { + stype = (regSize <= k32) ? PTY_f32 : PTY_f64; + } + + if (a64CGFunc->IsImmediateOffsetOutOfRange(*static_cast(spillMem), k64)) { + regno_t pregNO = phyOpnd.GetRegisterNumber(); + spillMem = &a64CGFunc->SplitOffsetWithAddInstruction(*static_cast(spillMem), k64, + static_cast(pregNO), false, &insn); + } + Insn &stInsn = + cg->BuildInstruction(a64CGFunc->PickStInsn(spillMem->GetSize(), stype), phyOpnd, *spillMem); + std::string comment = " SPILL for spill vreg: " + std::to_string(regNO); + stInsn.SetComment(comment); + insn.GetBB()->InsertInsnBefore(insn, stInsn); +} + +void GraphColorRegAllocator::SpillOperandForSpillPost(Insn &insn, const Operand &opnd, RegOperand &phyOpnd, + uint32 spillIdx, bool needSpill) { + if (!needSpill) { + return; + } + + MemOperand *spillMem = CreateSpillMem(spillIdx); + ASSERT(spillMem != nullptr, "spillMem nullptr check"); + auto *a64CGFunc = static_cast(cgFunc); + CG *cg = a64CGFunc->GetCG(); + + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + uint32 regSize = regOpnd.GetSize(); + PrimType stype; + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyInt) { + stype = (regSize <= k32) ? PTY_i32 : PTY_i64; + } else { + stype = (regSize <= k32) ? PTY_f32 : PTY_f64; + } + + bool isOutOfRange = false; + if (a64CGFunc->IsImmediateOffsetOutOfRange(*static_cast(spillMem), k64)) { + regno_t pregNO = phyOpnd.GetRegisterNumber(); + spillMem = &a64CGFunc->SplitOffsetWithAddInstruction(*static_cast(spillMem), k64, + static_cast(pregNO), true, &insn); + isOutOfRange = true; + } + Insn &ldrInsn = + cg->BuildInstruction(a64CGFunc->PickLdInsn(spillMem->GetSize(), stype), phyOpnd, *spillMem); + std::string comment = " RELOAD for spill vreg: " + std::to_string(regNO); + ldrInsn.SetComment(comment); + if (isOutOfRange) { + insn.GetBB()->InsertInsnAfter(*insn.GetNext(), ldrInsn); + } else { + insn.GetBB()->InsertInsnAfter(insn, ldrInsn); + } +} + +MemOperand *GraphColorRegAllocator::GetSpillOrReuseMem(LiveRange &lr, uint32 regSize, bool &isOutOfRange, Insn &insn, + bool isDef) { + (void)regSize; + MemOperand *memOpnd = nullptr; + if (lr.GetSpillMem() != nullptr) { + /* the saved memOpnd cannot be out-of-range */ + memOpnd = lr.GetSpillMem(); + } else { +#ifdef REUSE_SPILLMEM + memOpnd = GetReuseMem(lr.GetRegNO(), regSize, lr.GetRegType()); + if (memOpnd != nullptr) { + lr.SetSpillMem(*memOpnd); + lr.SetSpillSize((regSize <= k32) ? k32 : k64); + } else { +#endif /* REUSE_SPILLMEM */ + regno_t baseRegNO; + if (!isDef && lr.GetRegNO() == kRegTyInt) { + /* src will use its' spill reg as baseRegister when offset out-of-range + * add x16, x29, #max-offset //out-of-range + * ldr x16, [x16, #offset] //reload + * mov xd, x16 + */ + baseRegNO = lr.GetSpillReg(); + } else { + /* dest will use R17 as baseRegister when offset out-of-range + * mov x16, xs + * add x17, x29, #max-offset //out-of-range + * str x16, [x17, #offset] //spill + */ + baseRegNO = R17; + } + ASSERT(baseRegNO != kRinvalid, "invalid base register number"); + memOpnd = GetSpillMem(lr.GetRegNO(), isDef, insn, static_cast(baseRegNO), isOutOfRange); + /* dest's spill reg can only be R15 and R16 () */ + if (isOutOfRange && isDef) { + ASSERT(lr.GetSpillReg() != R17, "can not find valid memopnd's base register"); + } +#ifdef REUSE_SPILLMEM + if (isOutOfRange == 0) { + lr.SetSpillMem(*memOpnd); + lr.SetSpillSize((regSize <= k32) ? k32 : k64); + } + } +#endif /* REUSE_SPILLMEM */ + } + return memOpnd; +} + +/* + * Create spill insn for the operand. + * When need_spill is true, need to spill the spill operand register first + * then use it for the current spill, then reload it again. + */ +Insn *GraphColorRegAllocator::SpillOperand(Insn &insn, const Operand &opnd, bool isDef, RegOperand &phyOpnd) { + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + uint32 pregNO = phyOpnd.GetRegisterNumber(); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "SpillOperand " << regNO << "\n"; + } + + uint32 regSize = regOpnd.GetSize(); + bool isOutOfRange = false; + PrimType stype; + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyInt) { + stype = (regSize <= k32) ? PTY_i32 : PTY_i64; + } else { + stype = (regSize <= k32) ? PTY_f32 : PTY_f64; + } + auto *a64CGFunc = static_cast(cgFunc); + CG *cg = a64CGFunc->GetCG(); + + Insn *spillDefInsn = nullptr; + if (isDef) { + LiveRange *lr = lrVec[regNO]; + lr->SetSpillReg(pregNO); + MemOperand *memOpnd = GetSpillOrReuseMem(*lr, regSize, isOutOfRange, insn, true); + spillDefInsn = &cg->BuildInstruction(a64CGFunc->PickStInsn(regSize, stype), phyOpnd, *memOpnd); + std::string comment = " SPILL vreg:" + std::to_string(regNO); + spillDefInsn->SetComment(comment); + if (isOutOfRange || (insn.GetNext() && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) { + insn.GetBB()->InsertInsnAfter(*insn.GetNext(), *spillDefInsn); + } else { + insn.GetBB()->InsertInsnAfter(insn, *spillDefInsn); + } + if ((insn.GetMachineOpcode() != MOP_xmovkri16) && (insn.GetMachineOpcode() != MOP_wmovkri16)) { + return spillDefInsn; + } + } + if (insn.GetMachineOpcode() == MOP_clinit_tail) { + return nullptr; + } + LiveRange *lr = lrVec[regNO]; + lr->SetSpillReg(pregNO); + MemOperand *memOpnd = GetSpillOrReuseMem(*lr, regSize, isOutOfRange, insn, false); + Insn &spillUseInsn = cg->BuildInstruction(a64CGFunc->PickLdInsn(regSize, stype), phyOpnd, *memOpnd); + std::string comment = " RELOAD vreg:" + std::to_string(regNO); + spillUseInsn.SetComment(comment); + insn.GetBB()->InsertInsnBefore(insn, spillUseInsn); + if (spillDefInsn != nullptr) { + return spillDefInsn; + } + return &insn; +} + +/* Try to find available reg for spill. */ +bool GraphColorRegAllocator::SetAvailableSpillReg(std::unordered_set &cannotUseReg, LiveRange &lr, + uint64 &usedRegMask) { + bool isInt = (lr.GetRegType() == kRegTyInt); + regno_t base = isInt ? R0 : V0; + uint32 pregInterval = isInt ? 0 : (V0 - R30); + MapleSet &callerRegSet = isInt ? intCallerRegSet : fpCallerRegSet; + MapleSet &calleeRegSet = isInt ? intCalleeRegSet : fpCalleeRegSet; + + for (const auto &it : callerRegSet) { + regno_t spillReg = it + base; + if (cannotUseReg.find(spillReg) == cannotUseReg.end() && (usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) { + lr.SetAssignedRegNO(spillReg); + usedRegMask |= 1ULL << (spillReg - pregInterval); + return true; + } + } + for (const auto &it : calleeRegSet) { + regno_t spillReg = it + base; + if (cannotUseReg.find(spillReg) == cannotUseReg.end() && (usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) { + lr.SetAssignedRegNO(spillReg); + usedRegMask |= 1ULL << (spillReg - pregInterval); + return true; + } + } + return false; +} + +void GraphColorRegAllocator::CollectCannotUseReg(std::unordered_set &cannotUseReg, const LiveRange &lr, + Insn &insn) { + /* Find the bb in the conflict LR that actually conflicts with the current bb. */ + for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) { + if (lr.GetPregveto(regNO)) { + (void)cannotUseReg.insert(regNO); + } + } + auto updateCannotUse = [&insn, &cannotUseReg, this](regno_t regNO) { + LiveRange *conflictLr = lrVec[regNO]; + /* + * conflictLr->GetAssignedRegNO() might be zero + * caller save will be inserted so the assigned reg can be released actually + */ + if ((conflictLr->GetAssignedRegNO() > 0) && IsBitArrElemSet(conflictLr->GetBBMember(), insn.GetBB()->GetId())) { + if (!AArch64Abi::IsCalleeSavedReg(static_cast(conflictLr->GetAssignedRegNO())) && + conflictLr->GetNumCall()) { + return; + } + (void)cannotUseReg.insert(conflictLr->GetAssignedRegNO()); + } + }; + ForEachRegArrElem(lr.GetBBConflict(), updateCannotUse); +#ifdef USE_LRA + BBAssignInfo *bbInfo = bbRegInfo[insn.GetBB()->GetId()]; + if (bbInfo != nullptr) { + for (const auto ®MapPair : bbInfo->GetRegMap()) { + (void)cannotUseReg.insert(regMapPair.second); + } + } +#endif /* USE_LRA */ +} + +regno_t GraphColorRegAllocator::PickRegForSpill(uint64 &usedRegMask, RegType regType, uint32 spillIdx, + bool &needSpillLr) { + regno_t base; + regno_t spillReg; + uint32 pregInterval; + bool isIntReg = (regType == kRegTyInt); + if (isIntReg) { + base = R0; + pregInterval = 0; + } else { + base = V0; + pregInterval = V0 - R30; + } + + if (JAVALANG) { + /* Use predetermined spill register */ + MapleSet &spillRegSet = isIntReg ? intSpillRegSet : fpSpillRegSet; + ASSERT(spillIdx < spillRegSet.size(), "spillIdx large than spillRegSet.size()"); + auto regNumIt = spillRegSet.begin(); + for (; spillIdx > 0; --spillIdx) { + ++regNumIt; + } + spillReg = *regNumIt + base; + return spillReg; + } + + /* Temporary find a unused reg to spill */ + uint32 maxPhysRegNum = isIntReg ? MaxIntPhysRegNum() : MaxFloatPhysRegNum(); + for (spillReg = (maxPhysRegNum + base); spillReg > base; --spillReg) { + if (spillReg >= k64BitSize) { + spillReg = k64BitSize - 1; + } + if ((usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) { + usedRegMask |= (1ULL << (spillReg - pregInterval)); + needSpillLr = true; + return spillReg; + } + } + + ASSERT(false, "can not find spillReg"); + return 0; +} + +/* return true if need extra spill */ +bool GraphColorRegAllocator::SetRegForSpill(LiveRange &lr, Insn &insn, uint32 spillIdx, uint64 &usedRegMask, + bool isDef) { + std::unordered_set cannotUseReg; + /* SPILL COALESCE */ + if (!isDef && (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr)) { + auto &ropnd = static_cast(insn.GetOperand(0)); + if (ropnd.IsPhysicalRegister()) { + lr.SetAssignedRegNO(ropnd.GetRegisterNumber()); + return false; + } + } + + CollectCannotUseReg(cannotUseReg, lr, insn); + + if (SetAvailableSpillReg(cannotUseReg, lr, usedRegMask)) { + return false; + } + + bool needSpillLr = false; + if (!lr.GetAssignedRegNO()) { + /* + * All regs are assigned and none are free. + * Pick a reg to spill and reuse for this spill. + * Need to make sure the reg picked is not assigned to this insn, + * else there will be conflict. + */ + RegType regType = lr.GetRegType(); + regno_t spillReg = PickRegForSpill(usedRegMask, regType, spillIdx, needSpillLr); + if (static_cast(insn).GetMachineOpcode() == MOP_lazy_ldr && spillReg == R17) { + CHECK_FATAL(false, "register IP1(R17) may be changed when lazy_ldr"); + } + lr.SetAssignedRegNO(spillReg); + } + return needSpillLr; +} + +RegOperand *GraphColorRegAllocator::GetReplaceOpndForLRA(Insn &insn, const Operand &opnd, uint32 &spillIdx, + uint64 &usedRegMask, bool isDef) { + auto ®Opnd = static_cast(opnd); + uint32 vregNO = regOpnd.GetRegisterNumber(); + RegType regType = regOpnd.GetRegisterType(); + BBAssignInfo *bbInfo = bbRegInfo[insn.GetBB()->GetId()]; + if (bbInfo == nullptr) { + return nullptr; + } + auto regIt = bbInfo->GetRegMap().find(vregNO); + if (regIt != bbInfo->GetRegMap().end()) { + RegOperand &phyOpnd = static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(regIt->second), regOpnd.GetSize(), regType); + return &phyOpnd; + } + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "spill vreg " << vregNO << "\n"; + } + regno_t spillReg; + bool needSpillLr = false; + if (insn.IsBranch() || insn.IsCall() || (insn.GetMachineOpcode() == MOP_clinit_tail) || + (insn.GetNext() && isDef && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) { + spillReg = R16; + } else { + /* + * use the reg that exclude livein/liveout/bbInfo->regMap + * Need to make sure the reg picked is not assigned to this insn, + * else there will be conflict. + */ + spillReg = PickRegForSpill(usedRegMask, regType, spillIdx, needSpillLr); + if (static_cast(insn).GetMachineOpcode() == MOP_lazy_ldr && spillReg == R17) { + CHECK_FATAL(false, "register IP1(R17) may be changed when lazy_ldr"); + } + AddCalleeUsed(spillReg, regType); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "\tassigning lra spill reg " << spillReg << "\n"; + } + } + RegOperand &phyOpnd = static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(spillReg), regOpnd.GetSize(), regType); + SpillOperandForSpillPre(insn, regOpnd, phyOpnd, spillIdx, needSpillLr); + Insn *spill = SpillOperand(insn, regOpnd, isDef, phyOpnd); + if (spill != nullptr) { + SpillOperandForSpillPost(*spill, regOpnd, phyOpnd, spillIdx, needSpillLr); + } + ++spillIdx; + return &phyOpnd; +} + +/* get spill reg and check if need extra spill */ +bool GraphColorRegAllocator::GetSpillReg(Insn &insn, LiveRange &lr, uint32 &spillIdx, uint64 &usedRegMask, bool isDef) { + bool needSpillLr = false; + /* + * Find a spill reg for the BB among interfereing LR. + * Without LRA, this info is very inaccurate. It will falsely interfere + * with all locals which the spill might not be interfering. + * For now, every instance of the spill requires a brand new reg assignment. + */ + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "LR-regNO " << lr.GetRegNO() << " spilled, finding a spill reg\n"; + } + if (insn.IsBranch() || insn.IsCall() || (insn.GetMachineOpcode() == MOP_clinit_tail) || + (insn.GetNext() && isDef && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) { + /* + * When a cond branch reg is spilled, it cannot + * restore the value after the branch since it can be the target from other br. + * Todo it properly, it will require creating a intermediate bb for the reload. + * Use x16, it is taken out from available since it is used as a global in the system. + */ + lr.SetAssignedRegNO(R16); + } else { + lr.SetAssignedRegNO(0); + needSpillLr = SetRegForSpill(lr, insn, spillIdx, usedRegMask, isDef); + AddCalleeUsed(lr.GetAssignedRegNO(), lr.GetRegType()); + } + return needSpillLr; +} + +RegOperand *GraphColorRegAllocator::GetReplaceOpnd(Insn &insn, const Operand &opnd, uint32 &spillIdx, + uint64 &usedRegMask, bool isDef) { + if (!opnd.IsRegister()) { + return nullptr; + } + auto ®Opnd = static_cast(opnd); + + uint32 vregNO = regOpnd.GetRegisterNumber(); + RegType regType = regOpnd.GetRegisterType(); + if (vregNO < kNArmRegisters) { + return nullptr; + } + if (IsUnconcernedReg(regOpnd)) { + return nullptr; + } + +#ifdef USE_LRA + if (IsLocalReg(vregNO)) { + return GetReplaceOpndForLRA(insn, opnd, spillIdx, usedRegMask, isDef); + } +#endif /* USE_LRA */ + + ASSERT(vregNO < lrVec.size(), "index out of range of MapleVector in GraphColorRegAllocator::GetReplaceOpnd"); + LiveRange *lr = lrVec[vregNO]; + + bool isSplitPart = false; + bool needSpillLr = false; + if (lr->GetSplitLr() && IsBitArrElemSet(lr->GetSplitLr()->GetBBMember(), insn.GetBB()->GetId())) { + isSplitPart = true; + } + + if (lr->IsSpilled() && !isSplitPart) { + needSpillLr = GetSpillReg(insn, *lr, spillIdx, usedRegMask, isDef); + } + + regno_t regNO; + if (isSplitPart) { + regNO = lr->GetSplitLr()->GetAssignedRegNO(); + } else { + regNO = lr->GetAssignedRegNO(); + } + bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast(regNO)); + RegOperand &phyOpnd = static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(regNO), opnd.GetSize(), regType); + if (GCRA_DUMP) { + LogInfo::MapleLogger() << "replace R" << vregNO << " with R" << (regNO - R0) << "\n"; + } + + insn.AppendComment(" [R" + std::to_string(vregNO) + "] "); + + if (isSplitPart && (isCalleeReg || lr->GetSplitLr()->GetNumCall() == 0)) { + if (isDef) { + SpillOperand(insn, opnd, isDef, phyOpnd); + ++spillIdx; + } else { + if (lr->GetSplitLr()->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->NeedReload()) { + SpillOperand(insn, opnd, isDef, phyOpnd); + ++spillIdx; + } + } + return &phyOpnd; + } + + if (lr->IsSpilled() || (isSplitPart && (lr->GetSplitLr()->GetNumCall() != 0)) || (lr->GetNumCall() && !isCalleeReg) || + (!isSplitPart && !(lr->IsSpilled()) && lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->NeedReload())) { + SpillOperandForSpillPre(insn, regOpnd, phyOpnd, spillIdx, needSpillLr); + Insn *spill = SpillOperand(insn, opnd, isDef, phyOpnd); + if (spill != nullptr) { + SpillOperandForSpillPost(*spill, regOpnd, phyOpnd, spillIdx, needSpillLr); + } + ++spillIdx; + } + + return &phyOpnd; +} + +void GraphColorRegAllocator::MarkUsedRegs(Operand &opnd, BBAssignInfo *bbInfo, uint64 &usedRegMask) { + auto ®Opnd = static_cast(opnd); + uint32 pregInterval = (regOpnd.GetRegisterType() == kRegTyInt) ? 0 : (V0 - R30); + uint32 vregNO = regOpnd.GetRegisterNumber(); + LiveRange *lr = lrVec[vregNO]; + if (lr != nullptr) { + if (lr->GetAssignedRegNO() != 0) { + usedRegMask |= (1ULL << (lr->GetAssignedRegNO() - pregInterval)); + } + if (lr->GetSplitLr() && lr->GetSplitLr()->GetAssignedRegNO()) { + usedRegMask |= (1ULL << (lr->GetSplitLr()->GetAssignedRegNO() - pregInterval)); + } + } else if (bbInfo != nullptr && bbInfo->HasRegMap(vregNO)) { + usedRegMask |= (1ULL << (bbInfo->GetRegMapElem(vregNO) - pregInterval)); + } +} + +uint64 GraphColorRegAllocator::FinalizeRegisterPreprocess(BBAssignInfo *bbInfo, FinalizeRegisterInfo &fInfo, + Insn &insn) { + uint64 usedRegMask = 0; + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + ASSERT(md->GetOperand(i) != nullptr, "pointer is null in GraphColorRegAllocator::FinalizeRegisters"); + + if (opnd.IsList()) { + /* For arm32, not arm64 */ + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + if (base != nullptr) { + fInfo.SetBaseOperand(opnd, i); + MarkUsedRegs(*base, bbInfo, usedRegMask); + } + Operand *offset = memOpnd.GetIndexRegister(); + if (offset != nullptr) { + fInfo.SetOffsetOperand(opnd); + MarkUsedRegs(*offset, bbInfo, usedRegMask); + } + } else { + bool isDef = md->GetOperand(i)->IsRegDef(); + if (isDef) { + fInfo.SetDefOperand(opnd, i); + + /* + * Need to exclude def also, since it will clobber the result when the + * original value is reloaded. + */ + MarkUsedRegs(opnd, bbInfo, usedRegMask); + } else { + fInfo.SetUseOperand(opnd, i); + if (opnd.IsRegister()) { + MarkUsedRegs(opnd, bbInfo, usedRegMask); + } + } + } + } /* operand */ + return usedRegMask; +} + +/* Iterate through all instructions and change the vreg to preg. */ +void GraphColorRegAllocator::FinalizeRegisters() { + for (auto *bb : sortedBBs) { + BBAssignInfo *bbInfo = bbRegInfo[bb->GetId()]; + FOR_BB_INSNS(insn, bb) { + if (insn->IsImmaterialInsn()) { + continue; + } + if (!insn->IsMachineInstruction()) { + continue; + } + if (insn->GetId() == 0) { + continue; + } + + FinalizeRegisterInfo *fInfo = cgFunc->GetMemoryPool()->New(alloc); + uint64 usedRegMask = FinalizeRegisterPreprocess(bbInfo, *fInfo, *insn); + uint32 defSpillIdx = 0; + uint32 useSpillIdx = 0; + MemOperand *memOpnd = nullptr; + if (fInfo->GetBaseOperand()) { + memOpnd = static_cast( + static_cast(fInfo->GetBaseOperand())->Clone(*cgFunc->GetMemoryPool())); + insn->SetOperand(fInfo->GetMemOperandIdx(), *memOpnd); + Operand *base = memOpnd->GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + /* if base register is both defReg and useReg, defSpillIdx should also be increased. But it doesn't exist yet */ + RegOperand *phyOpnd = GetReplaceOpnd(*insn, *base, useSpillIdx, usedRegMask, false); + if (phyOpnd != nullptr) { + memOpnd->SetBaseRegister(*phyOpnd); + } + } + if (fInfo->GetOffsetOperand()) { + ASSERT(memOpnd != nullptr, "mem operand cannot be null"); + Operand *offset = memOpnd->GetIndexRegister(); + RegOperand *phyOpnd = GetReplaceOpnd(*insn, *offset, useSpillIdx, usedRegMask, false); + if (phyOpnd != nullptr) { + memOpnd->SetIndexRegister(*phyOpnd); + } + } + for (size_t i = 0; i < fInfo->GetDefOperandsSize(); ++i) { + const Operand *opnd = fInfo->GetDefOperandsElem(i); + RegOperand *phyOpnd = nullptr; + if (insn->IsSpecialIntrinsic()) { + phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, true); + } else { + phyOpnd = GetReplaceOpnd(*insn, *opnd, defSpillIdx, usedRegMask, true); + } + if (phyOpnd != nullptr) { + insn->SetOperand(fInfo->GetDefIdxElem(i), *phyOpnd); + } + } + for (size_t i = 0; i < fInfo->GetUseOperandsSize(); ++i) { + const Operand *opnd = fInfo->GetUseOperandsElem(i); + RegOperand *phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, false); + if (phyOpnd != nullptr) { + insn->SetOperand(fInfo->GetUseIdxElem(i), *phyOpnd); + } + } + } /* insn */ + } /* BB */ +} + +void GraphColorRegAllocator::MarkCalleeSaveRegs() { + for (auto regNO : intCalleeUsed) { + static_cast(cgFunc)->AddtoCalleeSaved(static_cast(regNO)); + } + for (auto regNO : fpCalleeUsed) { + static_cast(cgFunc)->AddtoCalleeSaved(static_cast(regNO)); + } +} + +bool GraphColorRegAllocator::AllocateRegisters() { +#ifdef RANDOM_PRIORITY + /* Change this seed for different random numbers */ + srand(0); +#endif /* RANDOM_PRIORITY */ + auto *a64CGFunc = static_cast(cgFunc); + + /* + * we store both FP/LR if using FP or if not using FP, but func has a call + * Using FP, record it for saving + */ + a64CGFunc->AddtoCalleeSaved(RFP); + a64CGFunc->AddtoCalleeSaved(RLR); + a64CGFunc->NoteFPLRAddedToCalleeSavedList(); + +#if DEBUG + int32 cnt = 0; + FOR_ALL_BB(bb, cgFunc) { + FOR_BB_INSNS(insn, bb) { + ++cnt; + } + } + ASSERT(cnt <= cgFunc->GetTotalNumberOfInstructions(), "Incorrect insn count"); +#endif + + cgFunc->SetIsAfterRegAlloc(); + /* EBO propgation extent the live range and might need to be turned off. */ + ComputeBlockOrder(); + + ComputeLiveRanges(); + + InitFreeRegPool(); + + InitCCReg(); + + BuildInterferenceGraph(); + + Separate(); + + SplitAndColor(); + +#ifdef USE_LRA + LocalRegisterAllocator(true); +#endif /* USE_LRA */ + + FinalizeRegisters(); + + MarkCalleeSaveRegs(); + + if (GCRA_DUMP) { + cgFunc->DumpCGIR(); + } + + return true; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_dependence.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_dependence.cpp new file mode 100644 index 0000000000000000000000000000000000000000..04c91d3ebb46914921f1c07d003a3b1ee3e168c7 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_dependence.cpp @@ -0,0 +1,1083 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_dependence.h" +#include "riscv64_cg.h" +#include "riscv64_operand.h" +#include "pressure.h" + +/* For building dependence graph, The entry is AArch64DepAnalysis::Run. */ +namespace maplebe { +/* constructor */ +AArch64DepAnalysis::AArch64DepAnalysis(CGFunc &func, MemPool &mp, MAD &mad, bool beforeRA) + : DepAnalysis(func, mp, mad, beforeRA), stackUses(alloc.Adapter()), + stackDefs(alloc.Adapter()), heapUses(alloc.Adapter()), + heapDefs(alloc.Adapter()), mayThrows(alloc.Adapter()), + ambiInsns(alloc.Adapter()), ehInRegs(alloc.Adapter()) { + uint32 maxRegNum; + if (beforeRA) { + maxRegNum = cgFunc.GetMaxVReg(); + } else { + maxRegNum = kAllRegNum; + } + regDefs = memPool.NewArray(maxRegNum); + regUses = memPool.NewArray(maxRegNum); +} + +/* print dep node information */ +void AArch64DepAnalysis::DumpDepNode(DepNode &node) const { + node.GetInsn()->Dump(); + uint32 num = node.GetUnitNum(); + LogInfo::MapleLogger() << "unit num : " << num << ", "; + for (uint32 i = 0; i < num; ++i) { + const Unit *unit = node.GetUnitByIndex(i); + if (unit != nullptr) { + PRINT_VAL(unit->GetName()); + } else { + PRINT_VAL("none"); + } + } + LogInfo::MapleLogger() << '\n'; + node.DumpSchedInfo(); + if (beforeRA) { + node.DumpRegPressure(); + } +} + +/* print dep link information */ +void AArch64DepAnalysis::DumpDepLink(DepLink &link, const DepNode *node) const { + PRINT_VAL(GetDepTypeName(link.GetDepType())); + PRINT_STR_VAL("Latency: ", link.GetLatency()); + if (node != nullptr) { + node->GetInsn()->Dump(); + return; + } + LogInfo::MapleLogger() << "from : "; + link.GetFrom().GetInsn()->Dump(); + LogInfo::MapleLogger() << "to : "; + link.GetTo().GetInsn()->Dump(); +} + +/* Append use register to the list. */ +void AArch64DepAnalysis::AppendRegUseList(Insn &insn, regno_t regNO) { + RegList *regList = memPool.New(); + regList->insn = &insn; + regList->next = nullptr; + if (regUses[regNO] == nullptr) { + regUses[regNO] = regList; + if (beforeRA) { + Insn *defInsn = regDefs[regNO]; + if (defInsn == nullptr) { + return; + } + DepNode *defNode = defInsn->GetDepNode(); + defNode->SetRegDefs(regNO, regList); + } + return; + } + RegList *lastRegList = regUses[regNO]; + while (lastRegList->next != nullptr) { + lastRegList = lastRegList->next; + } + lastRegList->next = regList; +} + +/* + * Add dependence edge. + * Two dependence node has a unique edge. + * True dependence overwirtes other dependences. + */ +void AArch64DepAnalysis::AddDependence(DepNode &fromNode, DepNode &toNode, DepType depType) { + /* Can not build a self loop dependence. */ + if (&fromNode == &toNode) { + return; + } + /* Check if exist edge. */ + if (!fromNode.GetSuccs().empty()) { + DepLink *depLink = fromNode.GetSuccs().back(); + if (&(depLink->GetTo()) == &toNode) { + if (depLink->GetDepType() != kDependenceTypeTrue) { + if (depType == kDependenceTypeTrue) { + /* Has exist edge, replace it. */ + depLink->SetDepType(kDependenceTypeTrue); + depLink->SetLatency(mad.GetLatency(*fromNode.GetInsn(), *toNode.GetInsn())); + } + } + return; + } + } + DepLink *depLink = memPool.New(fromNode, toNode, depType); + if (depType == kDependenceTypeTrue) { + depLink->SetLatency(mad.GetLatency(*fromNode.GetInsn(), *toNode.GetInsn())); + } + fromNode.AddSucc(*depLink); + toNode.AddPred(*depLink); +} + +void AArch64DepAnalysis::AddDependence4InsnInVectorByType(MapleVector &insns, Insn &insn, const DepType &type) { + for (auto anyInsn : insns) { + AddDependence(*anyInsn->GetDepNode(), *insn.GetDepNode(), type); + } +} + +void AArch64DepAnalysis::AddDependence4InsnInVectorByTypeAndCmp(MapleVector &insns, Insn &insn, + const DepType &type) { + for (auto anyInsn : insns) { + if (anyInsn != &insn) { + AddDependence(*anyInsn->GetDepNode(), *insn.GetDepNode(), type); + } + } +} + +/* Remove self dependence (self loop) in dependence graph. */ +void AArch64DepAnalysis::RemoveSelfDeps(Insn &insn) { + DepNode *node = insn.GetDepNode(); + ASSERT(node->GetSuccs().back()->GetTo().GetInsn() == &insn, "Is not a self dependence."); + ASSERT(node->GetPreds().back()->GetFrom().GetInsn() == &insn, "Is not a self dependence."); + node->RemoveSucc(); + node->RemovePred(); +} + +/* Build dependences of source register operand. */ +void AArch64DepAnalysis::BuildDepsUseReg(Insn &insn, regno_t regNO) { + DepNode *node = insn.GetDepNode(); + node->AddUseReg(regNO); + if (regDefs[regNO] != nullptr) { + /* Build true dependences. */ + AddDependence(*regDefs[regNO]->GetDepNode(), *insn.GetDepNode(), kDependenceTypeTrue); + } +} + +/* Build dependences of destination register operand. */ +void AArch64DepAnalysis::BuildDepsDefReg(Insn &insn, regno_t regNO) { + DepNode *node = insn.GetDepNode(); + node->AddDefReg(regNO); + /* Build anti dependences. */ + RegList *regList = regUses[regNO]; + while (regList != nullptr) { + CHECK_NULL_FATAL(regList->insn); + AddDependence(*regList->insn->GetDepNode(), *node, kDependenceTypeAnti); + regList = regList->next; + } + /* Build output depnedence. */ + if (regDefs[regNO] != nullptr) { + AddDependence(*regDefs[regNO]->GetDepNode(), *node, kDependenceTypeOutput); + } +} + +void AArch64DepAnalysis::ReplaceDepNodeWithNewInsn(DepNode &firstNode, DepNode &secondNode, Insn& newInsn, + bool isFromClinit) const { + if (isFromClinit) { + firstNode.AddClinitInsn(*firstNode.GetInsn()); + firstNode.AddClinitInsn(*secondNode.GetInsn()); + firstNode.SetCfiInsns(secondNode.GetCfiInsns()); + } else { + for (Insn *insn : secondNode.GetCfiInsns()) { + firstNode.AddCfiInsn(*insn); + } + for (Insn *insn : secondNode.GetComments()) { + firstNode.AddComments(*insn); + } + secondNode.ClearComments(); + } + firstNode.SetInsn(newInsn); + Reservation *rev = mad.FindReservation(newInsn); + CHECK_FATAL(rev != nullptr, "reservation is nullptr."); + firstNode.SetReservation(*rev); + firstNode.SetUnits(rev->GetUnit()); + firstNode.SetUnitNum(rev->GetUnitNum()); + newInsn.SetDepNode(firstNode); +} + +void AArch64DepAnalysis::ClearDepNodeInfo(DepNode &depNode) const { + Insn &insn = cgFunc.GetCG()->BuildInstruction(MOP_pseudo_none); + insn.SetDepNode(depNode); + Reservation *seRev = mad.FindReservation(insn); + depNode.SetInsn(insn); + depNode.SetType(kNodeTypeEmpty); + depNode.SetReservation(*seRev); + depNode.SetUnitNum(0); + depNode.ClearCfiInsns(); + depNode.SetUnits(nullptr); +} + +/* Combine adrpldr&clinit_tail to clinit. */ +void AArch64DepAnalysis::CombineClinit(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator) { + ASSERT(firstNode.GetInsn()->GetMachineOpcode() == MOP_adrp_ldr, "first insn should be adrpldr"); + ASSERT(secondNode.GetInsn()->GetMachineOpcode() == MOP_clinit_tail, "second insn should be clinit_tail"); + ASSERT(firstNode.GetCfiInsns().empty(), "There should not be any comment/cfi instructions between clinit."); + ASSERT(secondNode.GetComments().empty(), "There should not be any comment/cfi instructions between clinit."); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction( + MOP_clinit, firstNode.GetInsn()->GetOperand(0), firstNode.GetInsn()->GetOperand(1)); + newInsn.SetId(firstNode.GetInsn()->GetId()); + /* Replace first node with new insn. */ + ReplaceDepNodeWithNewInsn(firstNode, secondNode, newInsn, true); + /* Clear second node information. */ + ClearDepNodeInfo(secondNode); + CombineDependence(firstNode, secondNode, isAcrossSeparator); +} + +/* + * Combine memory access pair: + * 1.ldr to ldp. + * 2.str to stp. + */ +void AArch64DepAnalysis::CombineMemoryAccessPair(DepNode &firstNode, DepNode &secondNode, bool useFirstOffset) { + ASSERT(firstNode.GetInsn(), "the insn of first Node should not be nullptr"); + ASSERT(secondNode.GetInsn(), "the insn of second Node should not be nullptr"); + MOperator thisMop = firstNode.GetInsn()->GetMachineOpcode(); + MOperator mopPair = GetMopPair(thisMop); + ASSERT(mopPair != 0, "mopPair should not be zero"); + Operand *opnd0 = nullptr; + Operand *opnd1 = nullptr; + Operand *opnd2 = nullptr; + if (useFirstOffset) { + opnd0 = &(firstNode.GetInsn()->GetOperand(0)); + opnd1 = &(secondNode.GetInsn()->GetOperand(0)); + opnd2 = &(firstNode.GetInsn()->GetOperand(1)); + } else { + opnd0 = &(secondNode.GetInsn()->GetOperand(0)); + opnd1 = &(firstNode.GetInsn()->GetOperand(0)); + opnd2 = &(secondNode.GetInsn()->GetOperand(1)); + } + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopPair, *opnd0, *opnd1, *opnd2); + newInsn.SetId(firstNode.GetInsn()->GetId()); + std::string newComment; + const MapleString &comment = firstNode.GetInsn()->GetComment(); + if (comment.c_str() != nullptr) { + newComment += comment.c_str(); + } + const MapleString &secondComment = secondNode.GetInsn()->GetComment(); + if (secondComment.c_str() != nullptr) { + newComment += " "; + newComment += secondComment.c_str(); + } + if ((newComment.c_str() != nullptr) && (strlen(newComment.c_str()) > 0)) { + newInsn.SetComment(newComment); + } + /* Replace first node with new insn. */ + ReplaceDepNodeWithNewInsn(firstNode, secondNode, newInsn, false); + /* Clear second node information. */ + ClearDepNodeInfo(secondNode); + CombineDependence(firstNode, secondNode, false, true); +} + +/* Combine two dependence nodes to one */ +void AArch64DepAnalysis::CombineDependence(DepNode &firstNode, DepNode &secondNode, bool isAcrossSeparator, + bool isMemCombine) { + if (isAcrossSeparator) { + /* Clear all latency of the second node. */ + for (auto predLink : secondNode.GetPreds()) { + predLink->SetLatency(0); + } + for (auto succLink : secondNode.GetSuccs()) { + succLink->SetLatency(0); + } + return; + } + std::set uniqueNodes; + + for (auto predLink : firstNode.GetPreds()) { + if (predLink->GetDepType() == kDependenceTypeTrue) { + predLink->SetLatency(mad.GetLatency(*predLink->GetFrom().GetInsn(), *firstNode.GetInsn())); + } + (void)uniqueNodes.insert(&predLink->GetFrom()); + } + for (auto predLink : secondNode.GetPreds()) { + if (&predLink->GetFrom() != &firstNode) { + if (uniqueNodes.insert(&(predLink->GetFrom())).second) { + AddDependence(predLink->GetFrom(), firstNode, predLink->GetDepType()); + } + } + predLink->SetLatency(0); + } + uniqueNodes.clear(); + for (auto succLink : firstNode.GetSuccs()) { + if (succLink->GetDepType() == kDependenceTypeTrue) { + succLink->SetLatency(mad.GetLatency(*succLink->GetFrom().GetInsn(), *firstNode.GetInsn())); + } + (void)uniqueNodes.insert(&(succLink->GetTo())); + } + for (auto succLink : secondNode.GetSuccs()) { + if (uniqueNodes.insert(&(succLink->GetTo())).second) { + AddDependence(firstNode, succLink->GetTo(), succLink->GetDepType()); + if (isMemCombine) { + succLink->GetTo().IncreaseValidPredsSize(); + } + } + succLink->SetLatency(0); + } +} + +/* + * Build dependences of ambiguous instruction. + * ambiguous instruction : instructions that can not across may throw instructions. + */ +void AArch64DepAnalysis::BuildDepsAmbiInsn(Insn &insn) { + AddDependence4InsnInVectorByType(mayThrows, insn, kDependenceTypeThrow); + ambiInsns.emplace_back(&insn); +} + +/* Build dependences of may throw instructions. */ +void AArch64DepAnalysis::BuildDepsMayThrowInsn(Insn &insn) { + AddDependence4InsnInVectorByType(ambiInsns, insn, kDependenceTypeThrow); +} + +bool AArch64DepAnalysis::IsFrameReg(const RegOperand &opnd) const { + return (opnd.GetRegisterNumber() == RFP) || (opnd.GetRegisterNumber() == RSP); +} + +AArch64MemOperand *AArch64DepAnalysis::BuildNextMemOperandByByteSize(AArch64MemOperand &aarchMemOpnd, + uint32 byteSize) const { + AArch64MemOperand *nextMemOpnd = nullptr; + Operand *nextOpnd = aarchMemOpnd.Clone(memPool); + nextMemOpnd = static_cast(nextOpnd); + Operand *nextOfstOpnd = nextMemOpnd->GetOffsetImmediate()->Clone(memPool); + AArch64OfstOperand *aarchNextOfstOpnd = static_cast(nextOfstOpnd); + CHECK_NULL_FATAL(aarchNextOfstOpnd); + int32 offsetVal = aarchNextOfstOpnd->GetOffsetValue(); + aarchNextOfstOpnd->SetOffsetValue(offsetVal + byteSize); + nextMemOpnd->SetOffsetImmediate(*aarchNextOfstOpnd); + return nextMemOpnd; +} + +/* Get the second memory access operand of stp/ldp instructions. */ +AArch64MemOperand *AArch64DepAnalysis::GetNextMemOperand(Insn &insn, AArch64MemOperand &aarchMemOpnd) const { + AArch64MemOperand *nextMemOpnd = nullptr; + switch (insn.GetMachineOpcode()) { + case MOP_wldp: + case MOP_sldp: + case MOP_xldpsw: + case MOP_wstp: + case MOP_sstp: { + nextMemOpnd = BuildNextMemOperandByByteSize(aarchMemOpnd, k4ByteSize); + break; + } + case MOP_xldp: + case MOP_dldp: + case MOP_xstp: + case MOP_dstp: { + nextMemOpnd = BuildNextMemOperandByByteSize(aarchMemOpnd, k8ByteSize); + break; + } + default: + break; + } + + return nextMemOpnd; +} + +/* + * Build dependences of symbol memory access. + * Memory access with symbol must be a heap memory access. + */ +void AArch64DepAnalysis::BuildDepsAccessStImmMem(Insn &insn, bool isDest) { + if (isDest) { + /* + * Heap memory + * Build anti dependences. + */ + AddDependence4InsnInVectorByType(heapUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeOutput); + heapDefs.emplace_back(&insn); + } else { + /* Heap memory */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeTrue); + heapUses.emplace_back(&insn); + } + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } +} + +/* Build dependences of stack memory and heap memory uses. */ +void AArch64DepAnalysis::BuildDepsUseMem(Insn &insn, MemOperand &memOpnd) { + RegOperand *baseRegister = memOpnd.GetBaseRegister(); + AArch64MemOperand &aarchMemOpnd = static_cast(memOpnd); + AArch64MemOperand *nextMemOpnd = GetNextMemOperand(insn, aarchMemOpnd); + if (((baseRegister != nullptr) && IsFrameReg(*baseRegister)) || aarchMemOpnd.IsStackMem()) { + /* Stack memory address */ + for (auto defInsn : stackDefs) { + if (defInsn->IsCall() || NeedBuildDepsMem(aarchMemOpnd, nextMemOpnd, *defInsn)) { + AddDependence(*defInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeTrue); + continue; + } + } + stackUses.emplace_back(&insn); + } else { + /* Heap memory */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeTrue); + heapUses.emplace_back(&insn); + } + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } +} + +/* Return true if memInsn's memOpnd no alias with memOpnd and nextMemOpnd */ +bool AArch64DepAnalysis::NeedBuildDepsMem(const AArch64MemOperand &memOpnd, const AArch64MemOperand *nextMemOpnd, + Insn &memInsn) const { + auto *memOpndOfmemInsn = static_cast(memInsn.GetMemOpnd()); + if (!memOpnd.NoAlias(*memOpndOfmemInsn) || ((nextMemOpnd != nullptr) && !nextMemOpnd->NoAlias(*memOpndOfmemInsn))) { + return true; + } + AArch64MemOperand *nextMemOpndOfmemInsn = GetNextMemOperand(memInsn, *memOpndOfmemInsn); + if (nextMemOpndOfmemInsn != nullptr) { + if (!memOpnd.NoAlias(*nextMemOpndOfmemInsn) || + ((nextMemOpnd != nullptr) && !nextMemOpnd->NoAlias(*nextMemOpndOfmemInsn))) { + return true; + } + } + return false; +} + +/* + * Build anti dependences between insn and other insn that use stack memroy. + * insn : the instruction that defines stack memory. + * memOpnd : insn's memOpnd + * nextMemOpnd : some memory pair operator instruction (like ldp/stp) defines two memory. + */ +void AArch64DepAnalysis::BuildAntiDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, + const AArch64MemOperand *nextMemOpnd) { + for (auto *useInsn : stackUses) { + if (NeedBuildDepsMem(memOpnd, nextMemOpnd, *useInsn)) { + AddDependence(*useInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeAnti); + } + } +} + +/* + * Build output dependences between insn with other insn that define stack memroy. + * insn : the instruction that defines stack memory. + * memOpnd : insn's memOpnd + * nextMemOpnd : some memory pair operator instruction (like ldp/stp) defines two memory. + */ +void AArch64DepAnalysis::BuildOutputDepsDefStackMem(Insn &insn, const AArch64MemOperand &memOpnd, + const AArch64MemOperand *nextMemOpnd) { + for (auto defInsn : stackDefs) { + if (defInsn->IsCall() || NeedBuildDepsMem(memOpnd, nextMemOpnd, *defInsn)) { + AddDependence(*defInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeOutput); + } + } +} + +/* Build dependences of stack memory and heap memory definitions. */ +void AArch64DepAnalysis::BuildDepsDefMem(Insn &insn, MemOperand &memOpnd) { + RegOperand *baseRegister = memOpnd.GetBaseRegister(); + AArch64MemOperand &aarchMemOpnd = static_cast(memOpnd); + AArch64MemOperand *nextMemOpnd = GetNextMemOperand(insn, aarchMemOpnd); + + if (((baseRegister != nullptr) && IsFrameReg(*baseRegister)) || aarchMemOpnd.IsStackMem()) { + /* Build anti dependences. */ + BuildAntiDepsDefStackMem(insn, aarchMemOpnd, nextMemOpnd); + /* Build output depnedence. */ + BuildOutputDepsDefStackMem(insn, aarchMemOpnd, nextMemOpnd); + if (lastCallInsn != nullptr) { + /* Build a dependence between stack passed arguments and call. */ + ASSERT(baseRegister != nullptr, "baseRegister shouldn't be null here"); + if (baseRegister->GetRegisterNumber() == RSP) { + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeControl); + } + } + stackDefs.emplace_back(&insn); + } else { + /* Heap memory + * Build anti dependences. + */ + AddDependence4InsnInVectorByType(heapUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeOutput); + heapDefs.emplace_back(&insn); + } + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } + /* Memory definition can not across may-throw insns. */ + AddDependence4InsnInVectorByType(mayThrows, insn, kDependenceTypeThrow); +} + +/* Build dependences of memory barrior instructions. */ +void AArch64DepAnalysis::BuildDepsMemBar(Insn &insn) { + AddDependence4InsnInVectorByTypeAndCmp(stackUses, insn, kDependenceTypeMembar); + AddDependence4InsnInVectorByTypeAndCmp(heapUses, insn, kDependenceTypeMembar); + AddDependence4InsnInVectorByTypeAndCmp(stackDefs, insn, kDependenceTypeMembar); + AddDependence4InsnInVectorByTypeAndCmp(heapDefs, insn, kDependenceTypeMembar); + memBarInsn = &insn; +} + +/* A pseudo separator node depends all the other nodes. */ +void AArch64DepAnalysis::BuildDepsSeparator(DepNode &newSepNode, MapleVector &nodes) { + uint32 nextSepIndex = (separatorIndex + kMaxDependenceNum) < nodes.size() ? (separatorIndex + kMaxDependenceNum) + : static_cast(nodes.size() - 1); + newSepNode.ReservePreds(nextSepIndex - separatorIndex); + newSepNode.ReserveSuccs(nextSepIndex - separatorIndex); + for (uint32 i = separatorIndex; i < nextSepIndex; ++i) { + AddDependence(*nodes[i], newSepNode, kDependenceTypeSeparator); + } +} + + +/* Build control dependence for branch/ret instructions. */ +void AArch64DepAnalysis::BuildDepsControlAll(DepNode &depNode, const MapleVector &nodes) { + for (uint32 i = separatorIndex; i < depNode.GetIndex(); ++i) { + AddDependence(*nodes[i], depNode, kDependenceTypeControl); + } +} + +/* + * Build dependences of call instructions. + * Caller-saved physical registers will defined by a call instruction. + * Also a conditional register may modified by a call. + */ +void AArch64DepAnalysis::BuildCallerSavedDeps(Insn &insn) { + /* Build anti dependence and output dependence. */ + for (uint32 i = R0; i <= R7; ++i) { + BuildDepsDefReg(insn, i); + } + for (uint32 i = V0; i <= V7; ++i) { + BuildDepsDefReg(insn, i); + } + if (!beforeRA) { + for (uint32 i = R8; i <= R18; ++i) { + BuildDepsDefReg(insn, i); + } + for (uint32 i = R29; i <= RSP; ++i) { + BuildDepsUseReg(insn, i); + } + for (uint32 i = V16; i <= V31; ++i) { + BuildDepsDefReg(insn, i); + } + } + /* For condition operand, such as NE, EQ, and so on. */ + if (cgFunc.GetRflag() != nullptr) { + BuildDepsDefReg(insn, kRFLAG); + } +} + +/* + * Build dependence between control register and last call instruction. + * insn : instruction that with control register operand. + * isDest : if the control register operand is a destination operand. + */ +void AArch64DepAnalysis::BuildDepsBetweenControlRegAndCall(Insn &insn, bool isDest) { + if (lastCallInsn == nullptr) { + return; + } + if (isDest) { + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeOutput); + return; + } + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeAnti); +} + +/* + * Build dependence between stack-define-instruction that deal with call-insn's args and a call-instruction. + * insn : a call instruction (call/tail-call) + */ +void AArch64DepAnalysis::BuildStackPassArgsDeps(Insn &insn) { + for (auto stackDefInsn : stackDefs) { + if (stackDefInsn->IsCall()) { + continue; + } + Operand *opnd = stackDefInsn->GetMemOpnd(); + ASSERT(opnd->IsMemoryAccessOperand(), "make sure opnd is memOpnd"); + MemOperand *memOpnd = static_cast(opnd); + RegOperand *baseReg = memOpnd->GetBaseRegister(); + if ((baseReg != nullptr) && (baseReg->GetRegisterNumber() == RSP)) { + AddDependence(*stackDefInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeControl); + } + } +} + +/* Some insns may dirty all stack memory, such as "bl MCC_InitializeLocalStackRef". */ +void AArch64DepAnalysis::BuildDepsDirtyStack(Insn &insn) { + /* Build anti dependences. */ + AddDependence4InsnInVectorByType(stackUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(stackDefs, insn, kDependenceTypeOutput); + stackDefs.emplace_back(&insn); +} + +/* Some call insns may use all stack memory, such as "bl MCC_CleanupLocalStackRef_NaiveRCFast". */ +void AArch64DepAnalysis::BuildDepsUseStack(Insn &insn) { + /* Build true dependences. */ + AddDependence4InsnInVectorByType(stackDefs, insn, kDependenceTypeTrue); +} + +/* Some insns may dirty all heap memory, such as a call insn. */ +void AArch64DepAnalysis::BuildDepsDirtyHeap(Insn &insn) { + /* Build anti dependences. */ + AddDependence4InsnInVectorByType(heapUses, insn, kDependenceTypeAnti); + /* Build output depnedence. */ + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeOutput); + if (memBarInsn != nullptr) { + AddDependence(*memBarInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeMembar); + } + heapDefs.emplace_back(&insn); +} + +/* Build a pseudo node to seperate dependence graph. */ +DepNode *AArch64DepAnalysis::BuildSeparatorNode() { + Insn &pseudoSepInsn = cgFunc.GetCG()->BuildInstruction(MOP_pseudo_dependence_seperator); + DepNode *separatorNode = memPool.New(pseudoSepInsn, alloc); + separatorNode->SetType(kNodeTypeSeparator); + pseudoSepInsn.SetDepNode(*separatorNode); + if (beforeRA) { + RegPressure *regPressure = memPool.New(alloc); + separatorNode->SetRegPressure(*regPressure); + separatorNode->InitPressure(); + } + return separatorNode; +} + +/* Init depAnalysis data struction */ +void AArch64DepAnalysis::Init(BB &bb, MapleVector &nodes) { + curBB = &bb; + ClearAllDepData(); + lastComments.clear(); + /* Analysis live-in registers in catch BB. */ + AnalysisAmbiInsns(bb); + /* Clear all dependence nodes and push the first separator node. */ + nodes.clear(); + DepNode *pseudoSepNode = BuildSeparatorNode(); + nodes.emplace_back(pseudoSepNode); + separatorIndex = 0; + + if (beforeRA) { + /* assump first pseudo_dependence_seperator insn of current bb define live-in's registers */ + Insn *pseudoSepInsn = pseudoSepNode->GetInsn(); + for (auto ®NO : bb.GetLiveInRegNO()) { + regDefs[regNO] = pseudoSepInsn; + pseudoSepNode->AddDefReg(regNO); + pseudoSepNode->SetRegDefs(pseudoSepNode->GetDefRegnos().size(), nullptr); + } + } +} + +/* When a separator build, it is the same as a new basic block. */ +void AArch64DepAnalysis::ClearAllDepData() { + uint32 maxRegNum; + if (beforeRA) { + maxRegNum = cgFunc.GetMaxVReg(); + } else { + maxRegNum = kAllRegNum; + } + errno_t ret = memset_s(regDefs, sizeof(Insn*) * maxRegNum, 0, sizeof(Insn*) * maxRegNum); + CHECK_FATAL(ret == EOK, "call memset_s failed in Unit"); + ret = memset_s(regUses, sizeof(RegList*) * maxRegNum, 0, sizeof(RegList*) * maxRegNum); + CHECK_FATAL(ret == EOK, "call memset_s failed in Unit"); + memBarInsn = nullptr; + lastCallInsn = nullptr; + lastFrameDef = nullptr; + + stackUses.clear(); + stackDefs.clear(); + heapUses.clear(); + heapDefs.clear(); + mayThrows.clear(); + ambiInsns.clear(); +} + +/* Analysis live-in registers in catch bb and cleanup bb. */ +void AArch64DepAnalysis::AnalysisAmbiInsns(BB &bb) { + hasAmbiRegs = false; + if (bb.GetEhSuccs().empty()) { + return; + } + + /* Union all catch bb */ + for (auto succBB : bb.GetEhSuccs()) { + const MapleSet &liveInRegSet = succBB->GetLiveInRegNO(); + set_union(liveInRegSet.begin(), liveInRegSet.end(), + ehInRegs.begin(), ehInRegs.end(), + inserter(ehInRegs, ehInRegs.begin())); + } + + /* Union cleanup entry bb. */ + const MapleSet ®NOSet = cgFunc.GetCleanupEntryBB()->GetLiveInRegNO(); + std::set_union(regNOSet.begin(), regNOSet.end(), + ehInRegs.begin(), ehInRegs.end(), + inserter(ehInRegs, ehInRegs.begin())); + + /* Subtract R0 and R1, that is defined by eh runtime. */ + ehInRegs.erase(R0); + ehInRegs.erase(R1); + if (ehInRegs.empty()) { + return; + } + hasAmbiRegs = true; +} + +/* Check if regNO is in ehInRegs. */ +bool AArch64DepAnalysis::IfInAmbiRegs(regno_t regNO) const { + if (!hasAmbiRegs) { + return false; + } + if (ehInRegs.find(regNO) != ehInRegs.end()) { + return true; + } + return false; +} + +/* + * Build dependences of memory operand. + * insn : a instruction with the memory access operand. + * opnd : the memory access operand. + * regProp : operand property of the memory access operandess operand. + */ +void AArch64DepAnalysis::BuildMemOpndDependency(Insn &insn, Operand &opnd, const AArch64OpndProp ®Prop) { + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be memory Operand"); + AArch64MemOperand *memOpnd = static_cast(&opnd); + RegOperand *baseRegister = memOpnd->GetBaseRegister(); + if (baseRegister != nullptr) { + regno_t regNO = baseRegister->GetRegisterNumber(); + BuildDepsUseReg(insn, regNO); + if ((memOpnd->GetAddrMode() == AArch64MemOperand::kAddrModeBOi) && + (memOpnd->IsPostIndexed() || memOpnd->IsPreIndexed())) { + /* Base operand has changed. */ + BuildDepsDefReg(insn, regNO); + } + } + RegOperand *indexRegister = memOpnd->GetIndexRegister(); + if (indexRegister != nullptr) { + regno_t regNO = indexRegister->GetRegisterNumber(); + BuildDepsUseReg(insn, regNO); + } + if (regProp.IsUse()) { + BuildDepsUseMem(insn, *memOpnd); + } else { + BuildDepsDefMem(insn, *memOpnd); + BuildDepsAmbiInsn(insn); + } + if (insn.IsYieldPoint()) { + BuildDepsMemBar(insn); + BuildDepsDefReg(insn, kRFLAG); + } +} + +/* Build Dependency for each Operand of insn */ +void AArch64DepAnalysis::BuildOpndDependency(Insn &insn) { + const AArch64MD* md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + MOperator mOp = insn.GetMachineOpcode(); + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (opnd.IsMemoryAccessOperand()) { + BuildMemOpndDependency(insn, opnd, *regProp); + } else if (opnd.IsStImmediate()) { + if (mOp != MOP_xadrpl12) { + BuildDepsAccessStImmMem(insn, false); + } + } else if (opnd.IsRegister()) { + RegOperand ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + + if (regProp->IsUse()) { + BuildDepsUseReg(insn, regNO); + } + + if (regProp->IsDef()) { + BuildDepsDefReg(insn, regNO); + } + } else if (opnd.IsConditionCode()) { + /* For condition operand, such as NE, EQ, and so on. */ + if (regProp->IsUse()) { + BuildDepsUseReg(insn, kRFLAG); + BuildDepsBetweenControlRegAndCall(insn, false); + } + + if (regProp->IsDef()) { + BuildDepsDefReg(insn, kRFLAG); + BuildDepsBetweenControlRegAndCall(insn, true); + } + } else if (opnd.IsList()) { + ListOperand &listOpnd = static_cast(opnd); + /* Build true dependences */ + for (auto lst : listOpnd.GetOperands()) { + regno_t regNO = lst->GetRegisterNumber(); + BuildDepsUseReg(insn, regNO); + } + } + } +} + +/* + * Build dependences in some special issue (stack/heap/throw/clinit/lazy binding/control flow). + * insn : a instruction. + * depNode : insn's depNode. + * nodes : the dependence nodes inclue insn's depNode. + */ +void AArch64DepAnalysis::BuildSpecialInsnDependency(Insn &insn, DepNode &depNode, const MapleVector &nodes) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + MOperator mOp = insn.GetMachineOpcode(); + if (insn.IsCall() || insn.IsTailCall()) { + /* Caller saved registers. */ + BuildCallerSavedDeps(insn); + BuildStackPassArgsDeps(insn); + + if (mOp == MOP_xbl) { + FuncNameOperand &target = static_cast(insn.GetOperand(0)); + if ((target.GetName() == "MCC_InitializeLocalStackRef") || + (target.GetName() == "MCC_ClearLocalStackRef") || + (target.GetName() == "MCC_DecRefResetPair")) { + /* Write stack memory. */ + BuildDepsDirtyStack(insn); + } else if ((target.GetName() == "MCC_CleanupLocalStackRef_NaiveRCFast") || + (target.GetName() == "MCC_CleanupLocalStackRefSkip_NaiveRCFast") || + (target.GetName() == "MCC_CleanupLocalStackRefSkip")) { + /* UseStackMemory. */ + BuildDepsUseStack(insn); + } + } + BuildDepsDirtyHeap(insn); + BuildDepsAmbiInsn(insn); + if (lastCallInsn != nullptr) { + AddDependence(*lastCallInsn->GetDepNode(), *insn.GetDepNode(), kDependenceTypeControl); + } + lastCallInsn = &insn; + } else if (insn.IsClinit() || insn.IsLazyLoad() || insn.IsArrayClassCache()) { + BuildDepsDirtyHeap(insn); + BuildDepsDefReg(insn, kRFLAG); + if (!insn.IsAdrpLdr()) { + BuildDepsDefReg(insn, R16); + BuildDepsDefReg(insn, R17); + } + } else if ((mOp == MOP_xret) || md->IsBranch()) { + BuildDepsControlAll(depNode, nodes); + } else if (insn.IsMemAccessBar()) { + BuildDepsMemBar(insn); + } else if (insn.IsSpecialIntrinsic()) { + BuildDepsDirtyHeap(insn); + } +} + +/* + * If the instruction's number of current basic block more than kMaxDependenceNum, + * then insert some pseudo separator node to split baic block. + */ +void AArch64DepAnalysis::SeperateDependenceGraph(MapleVector &nodes, uint32 &nodeSum) { + if ((nodeSum > 0) && ((nodeSum % kMaxDependenceNum) == 0)) { + ASSERT(nodeSum == nodes.size(), "CG internal error, nodeSum should equal to nodes.size."); + /* Add a pseudo node to seperate dependence graph. */ + DepNode *separatorNode = BuildSeparatorNode(); + separatorNode->SetIndex(nodeSum); + nodes.emplace_back(separatorNode); + BuildDepsSeparator(*separatorNode, nodes); + + if (beforeRA) { + /* for all live-out register of current bb */ + for (auto ®NO : curBB->GetLiveOutRegNO()) { + if (regDefs[regNO] != nullptr) { + AppendRegUseList(*(separatorNode->GetInsn()), regNO); + separatorNode->AddUseReg(regNO); + separatorNode->SetRegUses(*regUses[regNO]); + } + } + } + ClearAllDepData(); + separatorIndex = nodeSum++; + } +} + +/* + * Generate a depNode, + * insn : create depNode for the instruction. + * nodes : a vector to store depNode. + * nodeSum : the new depNode's index. + * comments : those comment insn between last no-comment's insn and insn. + */ +DepNode *AArch64DepAnalysis::GenerateDepNode(Insn &insn, MapleVector &nodes, + int32 nodeSum, const MapleVector &comments) { + DepNode *depNode = nullptr; + Reservation *rev = mad.FindReservation(insn); + ASSERT(rev != nullptr, "rev is nullptr"); + depNode = memPool.New(insn, alloc, rev->GetUnit(), rev->GetUnitNum(), *rev); + if (beforeRA) { + RegPressure *regPressure = memPool.New(alloc); + depNode->SetRegPressure(*regPressure); + depNode->InitPressure(); + } + depNode->SetIndex(nodeSum); + nodes.emplace_back(depNode); + insn.SetDepNode(*depNode); + + constexpr size_t vectorSize = 5; + depNode->ReservePreds(vectorSize); + depNode->ReserveSuccs(vectorSize); + + if (!comments.empty()) { + depNode->SetComments(comments); + } + return depNode; +} + +void AArch64DepAnalysis::BuildAmbiInsnDependency(Insn &insn) { + const auto &defRegnos = insn.GetDepNode()->GetDefRegnos(); + for (const auto ®NO : defRegnos) { + if (IfInAmbiRegs(regNO)) { + BuildDepsAmbiInsn(insn); + break; + } + } +} + +void AArch64DepAnalysis::BuildMayThrowInsnDependency(Insn &insn) { + /* build dependency for maythrow insn; */ + if (insn.MayThrow()) { + BuildDepsMayThrowInsn(insn); + if (lastFrameDef != nullptr) { + AddDependence(*lastFrameDef->GetDepNode(), *insn.GetDepNode(), kDependenceTypeThrow); + } + } +} + +void AArch64DepAnalysis::UpdateRegUseAndDef(Insn &insn, DepNode &depNode, MapleVector &nodes) { + const auto &useRegnos = depNode.GetUseRegnos(); + if (beforeRA) { + depNode.InitRegUsesSize(useRegnos.size()); + } + for (auto regNO : useRegnos) { + AppendRegUseList(insn, regNO); + if (beforeRA) { + depNode.SetRegUses(*regUses[regNO]); + if (regDefs[regNO] == nullptr) { + regDefs[regNO] = nodes[separatorIndex]->GetInsn(); + nodes[separatorIndex]->AddDefReg(regNO); + nodes[separatorIndex]->SetRegDefs(nodes[separatorIndex]->GetDefRegnos().size(), regUses[regNO]); + } + } + } + + const auto &defRegnos = depNode.GetDefRegnos(); + size_t i = 0; + if (beforeRA) { + depNode.InitRegDefsSize(defRegnos.size()); + } + for (const auto regNO : defRegnos) { + regDefs[regNO] = &insn; + regUses[regNO] = nullptr; + if (beforeRA) { + depNode.SetRegDefs(i, nullptr); + if (regNO >= R0 && regNO <= R3) { + depNode.SetHasPreg(true); + } else if (regNO == R8) { + depNode.SetHasNativeCallRegister(true); + } + } + ++i; + } +} + +/* Update stack and heap dependency */ +void AArch64DepAnalysis::UpdateStackAndHeapDependency(DepNode &depNode, Insn &insn, const Insn &locInsn) { + if (!insn.MayThrow()) { + return; + } + depNode.SetLocInsn(locInsn); + mayThrows.emplace_back(&insn); + AddDependence4InsnInVectorByType(stackDefs, insn, kDependenceTypeThrow); + AddDependence4InsnInVectorByType(heapDefs, insn, kDependenceTypeThrow); +} + +/* Add a separatorNode to the end of a nodes + * * before RA: add all live-out registers to this separatorNode'Uses + * */ +void AArch64DepAnalysis::AddEndSeparatorNode(MapleVector &nodes) { + DepNode *separatorNode = BuildSeparatorNode(); + nodes.emplace_back(separatorNode); + BuildDepsSeparator(*separatorNode, nodes); + + if (beforeRA) { + /* for all live-out register of current bb */ + for (auto ®NO : curBB->GetLiveOutRegNO()) { + if (regDefs[regNO] != nullptr) { + AppendRegUseList(*(separatorNode->GetInsn()), regNO); + separatorNode->AddUseReg(regNO); + separatorNode->SetRegUses(*regUses[regNO]); + } + } + } +} + +/* + * Build dependence graph. + * 1: Build dependence nodes. + * 2: Build edges between dependence nodes. Edges are: + * 2.1) True dependences + * 2.2) Anti dependences + * 2.3) Output dependences + * 2.4) Barrier dependences + */ +void AArch64DepAnalysis::Run(BB &bb, MapleVector &nodes) { + /* Initial internal datas. */ + Init(bb, nodes); + uint32 nodeSum = 1; + MapleVector comments(alloc.Adapter()); + const Insn *locInsn = bb.GetFirstLoc(); + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + if (insn->IsImmaterialInsn()) { + if (!insn->IsComment()) { + locInsn = insn; + } else { + comments.emplace_back(insn); + } + } else if (insn->IsCfiInsn()) { + if (!nodes.empty()) { + nodes.back()->AddCfiInsn(*insn); + } + } + continue; + } + /* Add a pseudo node to seperate dependence graph when appropriate */ + SeperateDependenceGraph(nodes, nodeSum); + /* generate a DepNode */ + DepNode *depNode = GenerateDepNode(*insn, nodes, nodeSum, comments); + ++nodeSum; + comments.clear(); + /* Build Dependency for maythrow insn; */ + BuildMayThrowInsnDependency(*insn); + /* Build Dependency for each Operand of insn */ + BuildOpndDependency(*insn); + /* Build Dependency for special insn */ + BuildSpecialInsnDependency(*insn, *depNode, nodes); + /* Build Dependency for AmbiInsn if needed */ + BuildAmbiInsnDependency(*insn); + /* Update stack and heap dependency */ + UpdateStackAndHeapDependency(*depNode, *insn, *locInsn); + if (insn->IsFrameDef()) { + lastFrameDef = insn; + } + /* Seperator exists. */ + AddDependence(*nodes[separatorIndex], *insn->GetDepNode(), kDependenceTypeSeparator); + /* Update register use and register def */ + UpdateRegUseAndDef(*insn, *depNode, nodes); + } + + AddEndSeparatorNode(nodes); + + if (!comments.empty()) { + lastComments = comments; + } + comments.clear(); +} + +/* return dependence type name */ +const std::string &AArch64DepAnalysis::GetDepTypeName(DepType depType) const { + ASSERT(depType <= kDependenceTypeNone, "array boundary check failed"); + return kDepTypeName[depType]; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_ebo.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_ebo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9ee7dbe2d41a4220af154d8b5f9cda777d3825cf --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_ebo.cpp @@ -0,0 +1,974 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_ebo.h" +#include "riscv64_cg.h" +#include "mpl_logging.h" +namespace maplebe { +using namespace maple; +#define EBO_DUMP CG_DEBUG_FUNC(cgFunc) + +bool AArch64Ebo::IsFmov(const Insn &insn) const { + return ((MOP_xvmovsr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_xvmovrd)); +} + +bool AArch64Ebo::IsAdd(const Insn &insn) const { + return ((MOP_xaddrrr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_ssub)); +} + +bool AArch64Ebo::IsZeroRegister(const Operand &opnd) const { + if (!opnd.IsRegister()) { + return false; + } + const AArch64RegOperand *regOpnd = static_cast(&opnd); + return regOpnd->IsZeroRegister(); +} + +bool AArch64Ebo::IsClinitCheck(const Insn &insn) const { + MOperator mOp = insn.GetMachineOpcode(); + return ((mOp == MOP_clinit) || (mOp == MOP_clinit_tail)); +} + +/* retrun true if insn is globalneeded */ +bool AArch64Ebo::IsGlobalNeeded(Insn &insn) const { + /* Calls may have side effects. */ + if (insn.IsCall()) { + return true; + } + + /* Intrinsic call should not be removed. */ + if (insn.IsSpecialIntrinsic()) { + return true; + } + + /* Clinit should not be removed. */ + if (insn.IsFixedInsn()) { + return true; + } + + /* Yieldpoints should not be removed by optimizer. */ + if (cgFunc->GetCG()->GenYieldPoint() && insn.IsYieldPoint()) { + return true; + } + + Operand *opnd = insn.GetResult(0); + if ((opnd != nullptr) && (opnd->IsConstReg() || (opnd->IsRegister() && static_cast(opnd)->IsSPOrFP()))) { + return true; + } + return false; +} + +/* in aarch64,resOp will not be def and use in the same time */ +bool AArch64Ebo::ResIsNotDefAndUse(Insn &insn) const { + (void)insn; + return true; +} + +/* Return true if opnd live out of bb. */ +bool AArch64Ebo::LiveOutOfBB(const Operand &opnd, const BB &bb) const { + CHECK_FATAL(opnd.IsRegister(), "expect register here."); + /* when optimize_level < 2, there is need to anlyze live range. */ + if (live == nullptr) { + return false; + } + bool isLiveOut = false; + if (bb.GetLiveOut()->TestBit(static_cast(&opnd)->GetRegisterNumber())) { + isLiveOut = true; + } + return isLiveOut; +} + +bool AArch64Ebo::IsLastAndBranch(BB &bb, Insn &insn) const { + return (bb.GetLastInsn() == &insn) && insn.IsBranch(); +} + +const RegOperand &AArch64Ebo::GetRegOperand(const Operand &opnd) const { + CHECK_FATAL(opnd.IsRegister(), "aarch64 shoud not have regShiftOp! opnd is not register!"); + const auto &res = static_cast(opnd); + return res; +} + +/* Create infomation for local_opnd from its def insn current_insn. */ +OpndInfo *AArch64Ebo::OperandInfoDef(BB ¤tBB, Insn ¤tInsn, Operand &localOpnd) { + int32 hashVal = localOpnd.IsRegister() ? -1 : ComputeOpndHash(localOpnd); + OpndInfo *opndInfoPrev = GetOpndInfo(localOpnd, hashVal); + OpndInfo *opndInfo = GetNewOpndInfo(currentBB, ¤tInsn, localOpnd, hashVal); + if (localOpnd.IsMemoryAccessOperand()) { + MemOpndInfo *memInfo = static_cast(opndInfo); + MemOperand *mem = static_cast(&localOpnd); + Operand *base = mem->GetBaseRegister(); + Operand *offset = mem->GetOffset(); + if (base != nullptr && base->IsRegister()) { + memInfo->SetBaseInfo(*OperandInfoUse(currentBB, *base)); + } + if (offset != nullptr && offset->IsRegister()) { + memInfo->SetOffsetInfo(*OperandInfoUse(currentBB, *offset)); + } + } + opndInfo->same = opndInfoPrev; + if ((opndInfoPrev != nullptr)) { + opndInfoPrev->redefined = true; + if (opndInfoPrev->bb == ¤tBB) { + opndInfoPrev->redefinedInBB = true; + } + UpdateOpndInfo(localOpnd, *opndInfoPrev, opndInfo, hashVal); + } else { + SetOpndInfo(localOpnd, opndInfo, hashVal); + } + return opndInfo; +} + +void AArch64Ebo::DefineClinitSpecialRegisters(InsnInfo &insnInfo) { + Insn *insn = insnInfo.insn; + CHECK_FATAL(insn != nullptr, "nullptr of currInsnInfo"); + RegOperand &phyOpnd1 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R16, k64BitSize, kRegTyInt); + OpndInfo *opndInfo = OperandInfoDef(*insn->GetBB(), *insn, phyOpnd1); + opndInfo->insnInfo = &insnInfo; + + RegOperand &phyOpnd2 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R17, k64BitSize, kRegTyInt); + opndInfo = OperandInfoDef(*insn->GetBB(), *insn, phyOpnd2); + opndInfo->insnInfo = &insnInfo; +} + +void AArch64Ebo::BuildCallerSaveRegisters() { + callerSaveRegTable.clear(); + RegOperand &phyOpndR0 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt); + RegOperand &phyOpndV0 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(V0, k64BitSize, kRegTyFloat); + callerSaveRegTable.emplace_back(&phyOpndR0); + callerSaveRegTable.emplace_back(&phyOpndV0); + for (uint32 i = R1; i <= R18; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + callerSaveRegTable.emplace_back(&phyOpnd); + } + for (uint32 i = V1; i <= V7; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + callerSaveRegTable.emplace_back(&phyOpnd); + } + for (uint32 i = V16; i <= V31; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + callerSaveRegTable.emplace_back(&phyOpnd); + } + CHECK_FATAL(callerSaveRegTable.size() < kMaxCallerSaveReg, + "number of elements in callerSaveRegTable must less then 45!"); +} + +void AArch64Ebo::DefineCallerSaveRegisters(InsnInfo &insnInfo) { + Insn *insn = insnInfo.insn; + ASSERT(insn->IsCall(), "insn should be a call insn."); + for (auto opnd : callerSaveRegTable) { + OpndInfo *opndInfo = OperandInfoDef(*insn->GetBB(), *insn, *opnd); + opndInfo->insnInfo = &insnInfo; + } +} + +void AArch64Ebo::DefineReturnUseRegister(Insn &insn) { + /* Define scalar callee save register and FP, LR. */ + for (uint32 i = R19; i <= R30; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpnd); + } + + /* Define SP */ + RegOperand &phyOpndSP = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(RSP), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpndSP); + + /* Define FP callee save registers. */ + for (uint32 i = V8; i <= V15; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + OperandInfoUse(*insn.GetBB(), phyOpnd); + } +} + +void AArch64Ebo::DefineCallUseSpecialRegister(Insn &insn) { + /* Define FP, LR. */ + for (uint32 i = R29; i <= R30; i++) { + RegOperand &phyOpnd = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpnd); + } + + /* Define SP */ + RegOperand &phyOpndSP = + a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(RSP), k64BitSize, kRegTyInt); + OperandInfoUse(*insn.GetBB(), phyOpndSP); +} + +/* return true if op1 == op2 */ +bool AArch64Ebo::OperandEqSpecial(const Operand &op1, const Operand &op2) const { + switch (op1.GetKind()) { + case Operand::kOpdRegister: { + const AArch64RegOperand ®1 = static_cast(op1); + const AArch64RegOperand ®2 = static_cast(op2); + return reg1 == reg2; + } + case Operand::kOpdImmediate: { + const ImmOperand &imm1 = static_cast(op1); + const ImmOperand &imm2 = static_cast(op2); + return imm1 == imm2; + } + case Operand::kOpdOffset: { + const AArch64OfstOperand &ofst1 = static_cast(op1); + const AArch64OfstOperand &ofst2 = static_cast(op2); + return ofst1 == ofst2; + } + case Operand::kOpdStImmediate: { + const StImmOperand &stImm1 = static_cast(op1); + const StImmOperand &stImm2 = static_cast(op2); + return stImm1 == stImm2; + } + case Operand::kOpdMem: { + const AArch64MemOperand &mem1 = static_cast(op1); + const AArch64MemOperand &mem2 = static_cast(op2); + if (mem1.GetAddrMode() == mem2.GetAddrMode()) { + ASSERT(mem1.GetBaseRegister() != nullptr, "nullptr check"); + ASSERT(mem2.GetBaseRegister() != nullptr, "nullptr check"); + } + return ((mem1.GetAddrMode() == mem2.GetAddrMode()) && + OperandEqual(*(mem1.GetBaseRegister()), *(mem2.GetBaseRegister())) && + OperandEqual(*(mem1.GetIndexRegister()), *(mem2.GetIndexRegister())) && + OperandEqual(*(mem1.GetOffsetOperand()), *(mem2.GetOffsetOperand())) && + (mem1.GetSymbol() == mem2.GetSymbol()) && (mem1.GetSize() == mem2.GetSize())); + } + default: { + return false; + } + } +} + +int32 AArch64Ebo::GetOffsetVal(const MemOperand &mem) const { + const AArch64MemOperand &memOpnd = static_cast(mem); + AArch64OfstOperand *offset = memOpnd.GetOffsetImmediate(); + int32 val = 0; + if (offset != nullptr) { + val += offset->GetOffsetValue(); + + if (offset->IsSymOffset() || offset->IsSymAndImmOffset()) { + val += offset->GetSymbol()->GetStIdx().Idx(); + } + } + return val; +} + +/* + * move vreg1, #1 + * move vreg2, vreg1 + * ===> + * move vreg1, #1 + * move vreg2, #1 + * return true if do simplify successfully. + */ +bool AArch64Ebo::DoConstProp(Insn &insn, uint32 idx, Operand &opnd) { + AArch64ImmOperand *src = static_cast(&opnd); + const AArch64MD *md = &AArch64CG::kMd[(insn.GetMachineOpcode())]; + /* avoid the invalid case "cmp wzr, #0"/"add w1, wzr, #100" */ + if (src->IsZero() && insn.GetOperand(idx).IsRegister() && (insn.IsStore() || insn.IsMove() || md->IsCondDef())) { + insn.SetOperand(idx, *GetZeroOpnd(src->GetSize())); + return true; + } + MOperator mopCode = insn.GetMachineOpcode(); + switch (mopCode) { + case MOP_xmovrr: + case MOP_wmovrr: { + ASSERT(idx == kInsnSecondOpnd, "src const for move must be the second operand."); + uint32 targetSize = insn.GetOperand(idx).GetSize(); + if (src->GetSize() != targetSize) { + src = static_cast(src->Clone(*cgFunc->GetMemoryPool())); + CHECK_FATAL(src != nullptr, "pointer result is null"); + src->SetSize(targetSize); + } + if (src->IsSingleInstructionMovable()) { + if (EBO_DUMP) { + LogInfo::MapleLogger() << " Do constprop:Prop constval " << src->GetValue() << "into insn:\n"; + insn.Dump(); + } + insn.SetOperand(kInsnSecondOpnd, *src); + MOperator mOp = (mopCode == MOP_wmovrr) ? MOP_xmovri32 : MOP_xmovri64; + insn.SetMOperator(mOp); + if (EBO_DUMP) { + LogInfo::MapleLogger() << " after constprop the insn is:\n"; + insn.Dump(); + } + return true; + } + break; + } + case MOP_xaddrrr: + case MOP_waddrrr: + case MOP_xsubrrr: + case MOP_wsubrrr: { + if ((idx != kInsnThirdOpnd) || !src->IsInBitSize(kMaxImmVal24Bits, 0) || + !(src->IsInBitSize(kMaxImmVal12Bits, 0) || src->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits))) { + return false; + } + Operand &result = insn.GetOperand(0); + bool is64Bits = (result.GetSize() == k64BitSize); + if (EBO_DUMP) { + LogInfo::MapleLogger() << " Do constprop:Prop constval " << src->GetValue() << "into insn:\n"; + insn.Dump(); + } + if (src->IsZero()) { + MOperator mOp = is64Bits ? MOP_xmovrr : MOP_wmovrr; + insn.SetMOP(mOp); + insn.PopBackOperand(); + if (EBO_DUMP) { + LogInfo::MapleLogger() << " after constprop the insn is:\n"; + insn.Dump(); + } + return true; + } + insn.SetOperand(kInsnThirdOpnd, *src); + if ((mopCode == MOP_xaddrrr) || (mopCode == MOP_waddrrr)) { + is64Bits ? insn.SetMOperator(MOP_xaddrri12) : insn.SetMOperator(MOP_waddrri12); + } else if ((mopCode == MOP_xsubrrr) || (mopCode == MOP_wsubrrr)) { + is64Bits ? insn.SetMOperator(MOP_xsubrri12) : insn.SetMOperator(MOP_wsubrri12); + } + if (EBO_DUMP) { + LogInfo::MapleLogger() << " after constprop the insn is:\n"; + insn.Dump(); + } + return true; + } + default: + break; + } + return false; +} + +/* optimize csel to cset */ +bool AArch64Ebo::Csel2Cset(Insn &insn, const MapleVector &opnds) { + MOperator opCode = insn.GetMachineOpcode(); + + if (insn.GetOpndNum() == 0) { + return false; + } + + Operand *res = insn.GetResult(0); + + ASSERT(res != nullptr, "expect a register"); + ASSERT(res->IsRegister(), "expect a register"); + /* only do integers */ + RegOperand *reg = static_cast(res); + if ((res == nullptr) || (!reg->IsOfIntClass())) { + return false; + } + /* csel ->cset */ + if ((opCode == MOP_wcselrrrc) || (opCode == MOP_xcselrrrc)) { + Operand *op0 = opnds.at(kInsnSecondOpnd); + Operand *op1 = opnds.at(kInsnThirdOpnd); + AArch64ImmOperand *imm0 = nullptr; + AArch64ImmOperand *imm1 = nullptr; + if (op0->IsImmediate()) { + imm0 = static_cast(op0); + } + if (op1->IsImmediate()) { + imm1 = static_cast(op1); + } + + bool reverse = (imm1 != nullptr) && imm1->IsOne() && + (((imm0 != nullptr) && imm0->IsZero()) || op0->IsZeroRegister()); + if (((imm0 != nullptr) && imm0->IsOne() && (((imm1 != nullptr) && imm1->IsZero()) || op1->IsZeroRegister())) || + reverse) { + if (EBO_DUMP) { + LogInfo::MapleLogger() << "change csel insn :\n"; + insn.Dump(); + } + Operand *result = insn.GetResult(0); + Operand &condOperand = insn.GetOperand(kInsnFourthOpnd); + if (!reverse) { + Insn &newInsn = cgFunc->GetCG()->BuildInstruction( + (opCode == MOP_xcselrrrc) ? MOP_xcsetrc : MOP_wcsetrc, *result, condOperand); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "to cset insn ====>\n"; + newInsn.Dump(); + } + } else { + auto &cond = static_cast(condOperand); + if (!CheckCondCode(cond)) { + return false; + } + CondOperand &reverseCond = a64CGFunc->GetCondOperand(GetReverseCond(cond)); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction( + (opCode == MOP_xcselrrrc) ? MOP_xcsetrc : MOP_wcsetrc, *result, reverseCond); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "to cset insn ====>\n"; + newInsn.Dump(); + } + } + return true; + } + } + return false; +} + +/* Look at an expression that has a constant operand and attempt to simplify the computations. */ +bool AArch64Ebo::SimplifyConstOperand(Insn &insn, const MapleVector &opnds, + const MapleVector &opndInfo) { + BB *bb = insn.GetBB(); + bool result = false; + if (insn.GetOpndNum() < 1) { + return false; + } + ASSERT(opnds.size() > 1, "opnds size must greater than 1"); + Operand *op0 = opnds[kInsnSecondOpnd]; + Operand *op1 = opnds[kInsnThirdOpnd]; + Operand *res = insn.GetResult(0); + CHECK_FATAL(res != nullptr, "null ptr check"); + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + uint32 opndSize = md->GetOperandSize(); + bool op0IsConstant = op0->IsConstant() && !op1->IsConstant(); + bool op1IsConstant = !op0->IsConstant() && op1->IsConstant(); + bool bothConstant = op0->IsConstant() && op1->IsConstant(); + AArch64ImmOperand *immOpnd = nullptr; + Operand *op = nullptr; + int32 idx0 = kInsnSecondOpnd; + if (op0IsConstant) { + immOpnd = static_cast(op0); + op = op1; + if (op->IsMemoryAccessOperand()) { + op = &(insn.GetOperand(kInsnThirdOpnd)); + } + idx0 = kInsnThirdOpnd; + } else if (op1IsConstant) { + immOpnd = static_cast(op1); + op = op0; + if (op->IsMemoryAccessOperand()) { + op = &(insn.GetOperand(kInsnSecondOpnd)); + } + } else if (bothConstant) { + AArch64ImmOperand *immOpnd0 = static_cast(op0); + AArch64ImmOperand *immOpnd1 = static_cast(op1); + return SimplifyBothConst(*insn.GetBB(), insn, *immOpnd0, *immOpnd1, opndSize); + } + CHECK_FATAL(immOpnd != nullptr, "constant operand required!"); + CHECK_FATAL(op != nullptr, "constant operand required!"); + /* For orr insn and one of the opnd is zero + * orr resOp, imm1, #0 | orr resOp, #0, imm1 + * =======> + * mov resOp, imm1 */ + if (((insn.GetMachineOpcode() == MOP_wiorrri12) || (insn.GetMachineOpcode() == MOP_xiorrri13) || + (insn.GetMachineOpcode() == MOP_xiorri13r) || (insn.GetMachineOpcode() == MOP_wiorri12r)) && immOpnd->IsZero()) { + MOperator mOp = opndSize == k64BitSize ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *res, *op); + bb->ReplaceInsn(insn, newInsn); + return true; + } + /* For the imm is 0. Then replace the insn by a move insn. */ + if (((MOP_xaddrrr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_sadd) && immOpnd->IsZero()) || + (op1IsConstant && (MOP_xsubrrr <= insn.GetMachineOpcode()) && (insn.GetMachineOpcode() <= MOP_ssub) && + immOpnd->IsZero())) { + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(opndSize == k64BitSize ? MOP_xmovrr : MOP_wmovrr, + *res, *op); + bb->ReplaceInsn(insn, newInsn); + return true; + } + + if ((insn.GetMachineOpcode() == MOP_xaddrrr) || (insn.GetMachineOpcode() == MOP_waddrrr)) { + if (immOpnd->IsInBitSize(kMaxImmVal24Bits, 0)) { + /* + * ADD Wd|WSP, Wn|WSP, #imm{, shift} ; 32-bit general registers + * ADD Xd|SP, Xn|SP, #imm{, shift} ; 64-bit general registers + * imm : 0 ~ 4095, shift: none, LSL #0, or LSL #12 + * aarch64 assembly takes up to 24-bits, if the lower 12 bits is all 0 + */ + if (immOpnd->IsInBitSize(kMaxImmVal12Bits, 0) || immOpnd->IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits)) { + MOperator mOp = opndSize == k64BitSize ? MOP_xaddrri12 : MOP_waddrri12; + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *res, *op, *immOpnd); + bb->ReplaceInsn(insn, newInsn); + result = true; + } + } + } + /* Look for the sequence which can be simpified. */ + if (result || (insn.GetMachineOpcode() == MOP_xaddrri12) || (insn.GetMachineOpcode() == MOP_waddrri12)) { + Insn *prev = opndInfo[idx0]->insn; + if ((prev != nullptr) && ((prev->GetMachineOpcode() == MOP_xaddrri12) || + (prev->GetMachineOpcode() == MOP_waddrri12))) { + OpndInfo *prevInfo0 = opndInfo[idx0]->insnInfo->origOpnd[kInsnSecondOpnd]; + /* if prevop0 has been redefined. skip this optimiztation. */ + if (prevInfo0->redefined) { + return result; + } + Operand &prevOpnd0 = prev->GetOperand(kInsnSecondOpnd); + AArch64ImmOperand &imm0 = static_cast(prev->GetOperand(kInsnThirdOpnd)); + int64_t val = imm0.GetValue() + immOpnd->GetValue(); + AArch64ImmOperand &imm1 = a64CGFunc->CreateImmOperand(val, opndSize, imm0.IsSignedValue()); + if (imm1.IsInBitSize(kMaxImmVal24Bits, 0) && (imm1.IsInBitSize(kMaxImmVal12Bits, 0) || + imm1.IsInBitSize(kMaxImmVal12Bits, kMaxImmVal12Bits))) { + MOperator mOp = (opndSize == k64BitSize ? MOP_xaddrri12 : MOP_waddrri12); + bb->ReplaceInsn(insn, cgFunc->GetCG()->BuildInstruction(mOp, *res, prevOpnd0, imm1)); + result = true; + } + } + } + return result; +} + +AArch64CC_t AArch64Ebo::GetReverseCond(const CondOperand &cond) const { + switch (cond.GetCode()) { + case CC_NE: + return CC_EQ; + case CC_EQ: + return CC_NE; + case CC_LT: + return CC_GE; + case CC_GE: + return CC_LT; + case CC_GT: + return CC_LE; + case CC_LE: + return CC_GT; + default: + CHECK_FATAL(0, "Not support yet."); + } + return kCcLast; +} + +/* return true if cond == CC_LE */ +bool AArch64Ebo::CheckCondCode(const CondOperand &cond) const { + switch (cond.GetCode()) { + case CC_NE: + case CC_EQ: + case CC_LT: + case CC_GE: + case CC_GT: + case CC_LE: + return true; + default: + return false; + } +} + +bool AArch64Ebo::SimplifyBothConst(BB &bb, Insn &insn, const AArch64ImmOperand &immOperand0, + const AArch64ImmOperand &immOperand1, uint32 opndSize) { + MOperator mOp = insn.GetMachineOpcode(); + int64 val = 0; + /* do not support negative const simplify yet */ + if (immOperand0.GetValue() < 0 || immOperand1.GetValue() < 0) { + return false; + } + switch (mOp) { + case MOP_weorrri12: + case MOP_weorrrr: + case MOP_xeorrri13: + case MOP_xeorrrr: + val = immOperand0.GetValue() ^ immOperand1.GetValue(); + break; + case MOP_wandrri12: + case MOP_waddrri24: + case MOP_wandrrr: + case MOP_xandrri13: + case MOP_xandrrr: + val = immOperand0.GetValue() & immOperand1.GetValue(); + break; + case MOP_wiorrri12: + case MOP_wiorri12r: + case MOP_wiorrrr: + case MOP_xiorrri13: + case MOP_xiorri13r: + case MOP_xiorrrr: + val = immOperand0.GetValue() | immOperand1.GetValue(); + break; + default: + return false; + } + Operand *res = insn.GetResult(0); + AArch64ImmOperand *immOperand = &a64CGFunc->CreateImmOperand(val, opndSize, false); + if (!immOperand->IsSingleInstructionMovable()) { + ASSERT(res->IsRegister(), " expect a register operand"); + static_cast(cgFunc)->SplitMovImmOpndInstruction(val, *(static_cast(res))); + bb.RemoveInsn(insn); + } else { + MOperator newmOp = opndSize == k64BitSize ? MOP_xmovri64 : MOP_xmovri32; + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(newmOp, *res, *immOperand); + bb.ReplaceInsn(insn, newInsn); + } + return true; +} + +/* Do some special pattern */ +bool AArch64Ebo::SpecialSequence(Insn &insn, const MapleVector &origInfos) { + MOperator opCode = insn.GetMachineOpcode(); + AArch64CGFunc *aarchFunc = static_cast(cgFunc); + switch (opCode) { + /* + * mov R503, R0 + * mov R0, R503 + * ==> mov R0, R0 + */ + case MOP_wmovrr: + case MOP_xmovrr: { + OpndInfo *opndInfo = origInfos[kInsnSecondOpnd]; + if (opndInfo == nullptr) { + return false; + } + Insn *prevInsn = opndInfo->insn; + if ((prevInsn != nullptr) && (prevInsn->GetMachineOpcode() == opCode) && + (prevInsn == insn.GetPreviousMachineInsn()) && + !RegistersIdentical(prevInsn->GetOperand(kInsnFirstOpnd), prevInsn->GetOperand(kInsnSecondOpnd)) && + !RegistersIdentical(insn.GetOperand(kInsnFirstOpnd), insn.GetOperand(kInsnSecondOpnd))) { + Operand *reg1 = insn.GetResult(0); + Operand ®2 = prevInsn->GetOperand(kInsnSecondOpnd); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(insn.GetMachineOpcode(), *reg1, reg2); + insn.GetBB()->ReplaceInsn(insn, newInsn); + return true; + } + break; + } + /* + * lsl x1, x1, #3 + * add x0, x0, x1 + * ===> add x0, x0, x1, 3({MOP_xaddrrrs, + * {MOPD_Reg64ID,MOPD_Reg64IS,MOPD_Reg64IS,MOPD_BitShift64,MOPD_Undef},0,"add","0,1,2,3", 1, 3}) + */ + case MOP_xaddrrr: + case MOP_waddrrr: { + if (insn.GetResult(0) == nullptr) { + return false; + } + bool is64bits = (insn.GetResult(0)->GetSize() == k64BitSize); + Operand &op0 = insn.GetOperand(kInsnSecondOpnd); + OpndInfo *opndInfo = origInfos.at(kInsnThirdOpnd); + if ((opndInfo != nullptr) && (opndInfo->insn != nullptr)) { + Insn *insn1 = opndInfo->insn; + InsnInfo *insnInfo1 = opndInfo->insnInfo; + CHECK_NULL_FATAL(insnInfo1); + MOperator opc1 = insn1->GetMachineOpcode(); + if ((opc1 == MOP_xlslrri6) || (opc1 == MOP_wlslrri5)) { + /* don't use register if it was redefined. */ + OpndInfo *opndInfo1 = insnInfo1->origOpnd[kInsnSecondOpnd]; + if ((opndInfo1 != nullptr) && opndInfo1->redefined) { + return false; + } + Operand &res = insn.GetOperand(kInsnFirstOpnd); + Operand &opnd1 = insn1->GetOperand(kInsnSecondOpnd); + auto &immOpnd = static_cast(insn1->GetOperand(kInsnThirdOpnd)); + uint32 xLslrriBitLen = 6; + uint32 wLslrriBitLen = 5; + Operand &shiftOpnd = aarchFunc->CreateBitShiftOperand( + BitShiftOperand::kLSL, immOpnd.GetValue(), (opCode == MOP_xlslrri6) ? xLslrriBitLen : wLslrriBitLen); + MOperator mOp = (is64bits ? MOP_xaddrrrs : MOP_waddrrrs); + insn.GetBB()->ReplaceInsn(insn, cgFunc->GetCG()->BuildInstruction(mOp, res, op0, + opnd1, shiftOpnd)); + return true; + } + } + break; + } + case MOP_wstr: + case MOP_xstr: + case MOP_wldr: + case MOP_xldr: { + /* + * add x2, x1, imm + * ldr x3, [x2] + * -> ldr x3, [x1, imm] + * --------------------- + * add x2, x1, imm + * str x3, [x2] + * -> str x3, [x1, imm] + */ + CHECK_NULL_FATAL(insn.GetResult(0)); + OpndInfo *opndInfo = origInfos[kInsnSecondOpnd]; + if (insn.IsLoad() && opndInfo == nullptr) { + return false; + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + bool is64bits = md->Is64Bit(); + uint32 size = md->GetOperandSize(); + OpndInfo *baseInfo = nullptr; + MemOperand *memOpnd = nullptr; + if (insn.IsLoad()) { + MemOpndInfo *memInfo = static_cast(opndInfo); + baseInfo = memInfo->GetBaseInfo(); + memOpnd = static_cast(memInfo->opnd); + } else { + Operand *res = insn.GetResult(0); + ASSERT(res->IsMemoryAccessOperand(), "res must be MemoryAccessOperand"); + memOpnd = static_cast(res); + Operand *base = memOpnd->GetBaseRegister(); + ASSERT(base->IsRegister(), "base must be Register"); + baseInfo = GetOpndInfo(*base, -1); + } + + if (static_cast(memOpnd)->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + + if ((baseInfo != nullptr) && (baseInfo->insn != nullptr)) { + Insn *insn1 = baseInfo->insn; + if (insn1->GetBB() != insn.GetBB()) { + return false; + } + InsnInfo *insnInfo1 = baseInfo->insnInfo; + CHECK_NULL_FATAL(insnInfo1); + MOperator opc1 = insn1->GetMachineOpcode(); + if ((opc1 == MOP_xaddrri12) || (opc1 == MOP_waddrri12)) { + if (memOpnd->GetOffset() == nullptr) { + return false; + } + AArch64ImmOperand *imm0 = static_cast(memOpnd->GetOffset()); + if (imm0 == nullptr) { + return false; + } + int64 imm0Val = imm0->GetValue(); + Operand &res = insn.GetOperand(kInsnFirstOpnd); + RegOperand *op1 = &static_cast(insn1->GetOperand(kInsnSecondOpnd)); + AArch64ImmOperand &imm1 = static_cast(insn1->GetOperand(kInsnThirdOpnd)); + int64 immVal; + /* don't use register if it was redefined. */ + OpndInfo *opndInfo1 = insnInfo1->origOpnd[kInsnSecondOpnd]; + if ((opndInfo1 != nullptr) && opndInfo1->redefined) { + /* + * add x2, x1, imm0, LSL imm1 + * add x2, x2, imm2 + * ldr x3, [x2] + * -> ldr x3, [x1, imm] + * ---------------------------- + * add x2, x1, imm0, LSL imm1 + * add x2, x2, imm2 + * str x3, [x2] + * -> str x3, [x1, imm] + */ + Insn *insn2 = opndInfo1->insn; + if (insn2 == nullptr) { + return false; + } + MOperator opCode2 = insn2->GetMachineOpcode(); + if ((opCode2 != MOP_xaddrri24) && (opCode2 != MOP_waddrri24)) { + return false; + } + auto &res2 = static_cast(insn2->GetOperand(kInsnFirstOpnd)); + auto &base2 = static_cast(insn2->GetOperand(kInsnSecondOpnd)); + auto &immOpnd2 = static_cast(insn2->GetOperand(kInsnThirdOpnd)); + auto &res1 = static_cast(insn1->GetOperand(kInsnFirstOpnd)); + if (RegistersIdentical(res1, *op1) && RegistersIdentical(res1, res2) && + (GetOpndInfo(base2, -1) != nullptr) && !GetOpndInfo(base2, -1)->redefined) { + immVal = imm0Val + imm1.GetValue() + + static_cast(static_cast(immOpnd2.GetValue()) << kMaxImmVal12Bits); + op1 = &base2; + } else { + return false; + } + } else { + immVal = imm0Val + imm1.GetValue(); + } + + /* multiple of 4 and 8 */ + const int multiOfFour = 4; + const int multiOfEight = 8; + is64bits = is64bits && + (!static_cast(insn).CheckRefField(static_cast(kInsnFirstOpnd), false)); + if ((!is64bits && (immVal < kStrLdrImm32UpperBound) && (immVal % multiOfFour == 0)) || + (is64bits && (immVal < kStrLdrImm64UpperBound) && (immVal % multiOfEight == 0))) { + /* Reserved physicalReg beforeRA */ + if (beforeRegAlloc && op1->IsPhysicalRegister()) { + return false; + } + MemOperand &mo = aarchFunc->CreateMemOpnd(*op1, immVal, size); + Insn &ldrInsn = cgFunc->GetCG()->BuildInstruction(opCode, res, mo); + insn.GetBB()->ReplaceInsn(insn, ldrInsn); + return true; + } + } + } + break; + } /* end case MOP_xldr */ + case MOP_xcsetrc: + case MOP_wcsetrc: { + /* i. cmp x0, x1 + * cset w0, EQ ===> cmp x0, x1 + * cmp w0, #0 cset w0, EQ + * cset w0, NE + * + * ii. cmp x0, x1 + * cset w0, EQ ===> cmp x0, x1 + * cmp w0, #0 cset w0, NE + * cset w0, EQ + * + * a.< -1 : 0x20ff25e0 > < 0 > cmp(226) (opnd0: vreg:C105 class: [CC]) (opnd1: vreg:R104 class: [I]) (opnd2: + * vreg:R106 class: [I]) + * b.< -1 : 0x20ff60a0 > < 0 > cset(72) (opnd0: vreg:R101 class: [I]) (opnd1: CC: EQ) + * c.< -1* : 0x20ff3870 > < 0 > cmp(223) (opnd0: vreg:C105 class: [CC]) (opnd1: vreg:R101 class: [I]) (opnd2: + * imm:0) + * d.< * -1 : 0x20ff3908 > < 0 > cset(72) (opnd0: vreg:R107 class: [I]) (opnd1: CC: NE) + * d1.< -1 : 0x20ff3908 > < 0 > * cset(72) (opnd0: vreg:R107 class: [I]) (opnd1: CC: EQ) i, d + * ===> mov R107 R101 ii, a,b,c,d1 ===> a,b,cset Rxx + * NE, c, mov R107 Rxx + */ + auto &cond = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if ((cond.GetCode() != CC_NE) && (cond.GetCode() != CC_EQ)) { + return false; + } + bool reverse = (cond.GetCode() == CC_EQ); + OpndInfo *condInfo = origInfos[kInsnSecondOpnd]; + if ((condInfo != nullptr) && condInfo->insn) { + Insn *cmp1 = condInfo->insn; + if ((cmp1->GetMachineOpcode() == MOP_xcmpri) || (cmp1->GetMachineOpcode() == MOP_wcmpri)) { + InsnInfo *cmpInfo1 = condInfo->insnInfo; + CHECK_FATAL(cmpInfo1 != nullptr, "pointor cmpInfo1 is null"); + OpndInfo *info0 = cmpInfo1->origOpnd[kInsnSecondOpnd]; + /* if R101 was not redefined. */ + if ((info0 != nullptr) && (info0->insnInfo != nullptr) && (info0->insn != nullptr) && + (reverse || !info0->redefined) && cmp1->GetOperand(kInsnThirdOpnd).IsImmediate()) { + Insn *csetInsn = info0->insn; + MOperator opc1 = csetInsn->GetMachineOpcode(); + if (((opc1 == MOP_xcsetrc) || (opc1 == MOP_wcsetrc)) && + static_cast(cmp1->GetOperand(kInsnThirdOpnd)).IsZero()) { + CondOperand &cond1 = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if (!CheckCondCode(cond1)) { + return false; + } + if (EBO_DUMP) { + LogInfo::MapleLogger() << "< === do specical condition optimization, replace insn ===> \n"; + insn.Dump(); + } + Operand *result = insn.GetResult(0); + CHECK_FATAL(result != nullptr, "pointor result is null"); + uint32 size = result->GetSize(); + if (reverse) { + /* After regalloction, we can't create a new register. */ + if (!beforeRegAlloc) { + return false; + } + AArch64CGFunc *aarFunc = static_cast(cgFunc); + Operand &r = aarFunc->CreateRegisterOperandOfType(static_cast(result)->GetRegisterType(), + size / kBitsPerByte); + /* after generate a new vreg, check if the size of DataInfo is big enough */ + EnlargeSpaceForLA(*csetInsn); + CondOperand &cond2 = aarFunc->GetCondOperand(GetReverseCond(cond1)); + Insn &newCset = cgFunc->GetCG()->BuildInstruction( + result->GetSize() == k64BitSize ? MOP_xcsetrc : MOP_wcsetrc, r, cond2); + /* new_cset use the same cond as cset_insn. */ + IncRef(*info0->insnInfo->origOpnd[kInsnSecondOpnd]); + csetInsn->GetBB()->InsertInsnAfter(*csetInsn, newCset); + MOperator mOp = (result->GetSize() == k64BitSize ? MOP_xmovrr : MOP_wmovrr); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *result, r); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "< === with new insn ===> \n"; + newInsn.Dump(); + } + } else { + Operand *result1 = csetInsn->GetResult(0); + MOperator mOp = ((result->GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr); + Insn &newInsn = cgFunc->GetCG()->BuildInstruction(mOp, *result, *result1); + insn.GetBB()->ReplaceInsn(insn, newInsn); + if (EBO_DUMP) { + LogInfo::MapleLogger() << "< === with new insn ===> \n"; + newInsn.Dump(); + } + } + return true; + } + } + } + } + } /* end case MOP_wcsetrc */ + [[clang::fallthrough]]; + default: + break; + } + return false; +} + +/* + * *iii. mov w16, v10.s[1] // FMOV from simd 105 ---> replace_insn + * mov w1, w16 ----->insn + * ==> + * mov w1, v10.s[1] + */ +bool AArch64Ebo::IsMovToSIMDVmov(Insn &insn, const Insn &replaceInsn) const { + if (insn.GetMachineOpcode() == MOP_wmovrr && replaceInsn.GetMachineOpcode() == MOP_xvmovrv) { + insn.SetMOperator(replaceInsn.GetMachineOpcode()); + return true; + } + return false; +} + +bool AArch64Ebo::ChangeLdrMop(Insn &insn, const Operand &opnd) const { + ASSERT(insn.IsLoad(), "expect insn is load in ChangeLdrMop"); + ASSERT(opnd.IsRegister(), "expect opnd is a register in ChangeLdrMop"); + + const RegOperand *regOpnd = static_cast(&opnd); + ASSERT(static_cast(insn.GetOperand(kInsnFirstOpnd)).GetRegisterType() != regOpnd->GetRegisterType(), + "expected matched register type in AArch64Ebo::ChangeLdrMop"); + if (static_cast(insn.GetOperand(kInsnSecondOpnd)).GetIndexRegister()) { + return false; + } + + bool bRet = true; + if (regOpnd->GetRegisterType() == kRegTyFloat) { + switch (insn.GetMachineOpcode()) { + case MOP_wldrb: + insn.SetMOperator(MOP_bldr); + break; + case MOP_wldrh: + insn.SetMOperator(MOP_hldr); + break; + case MOP_wldr: + insn.SetMOperator(MOP_sldr); + break; + case MOP_xldr: + insn.SetMOperator(MOP_dldr); + break; + case MOP_wldli: + insn.SetMOperator(MOP_sldli); + break; + case MOP_xldli: + insn.SetMOperator(MOP_dldli); + break; + case MOP_wldrsb: + case MOP_wldrsh: + default: + bRet = false; + break; + } + } else if (regOpnd->GetRegisterType() == kRegTyInt) { + switch (insn.GetMachineOpcode()) { + case MOP_bldr: + insn.SetMOperator(MOP_wldrb); + break; + case MOP_hldr: + insn.SetMOperator(MOP_wldrh); + break; + case MOP_sldr: + insn.SetMOperator(MOP_wldr); + break; + case MOP_dldr: + insn.SetMOperator(MOP_xldr); + break; + case MOP_sldli: + insn.SetMOperator(MOP_wldli); + break; + case MOP_dldli: + insn.SetMOperator(MOP_xldli); + break; + default: + bRet = false; + break; + } + } else { + ASSERT(false, "Internal error."); + } + return bRet; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_emitter.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_emitter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fd25a1eaba3759bf1b473e6e018f1084c64dba23 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_emitter.cpp @@ -0,0 +1,531 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_emitter.h" +#include +#include "riscv64_cgfunc.h" + +namespace { +using namespace maple; +const std::unordered_set kJniNativeFuncList = { + "Landroid_2Fos_2FParcel_3B_7CnativeWriteString_7C_28JLjava_2Flang_2FString_3B_29V_native", + "Landroid_2Fos_2FParcel_3B_7CnativeReadString_7C_28J_29Ljava_2Flang_2FString_3B_native", + "Landroid_2Fos_2FParcel_3B_7CnativeWriteInt_7C_28JI_29V_native", + "Landroid_2Fos_2FParcel_3B_7CnativeReadInt_7C_28J_29I_native", + "Landroid_2Fos_2FParcel_3B_7CnativeWriteInterfaceToken_7C_28JLjava_2Flang_2FString_3B_29V_native", + "Landroid_2Fos_2FParcel_3B_7CnativeEnforceInterface_7C_28JLjava_2Flang_2FString_3B_29V_native" +}; +// map func name to pair +using Func2CodeInsnMap = std::unordered_map>; +Func2CodeInsnMap func2CodeInsnMap { + { "Ljava_2Flang_2FString_3B_7ChashCode_7C_28_29I", + { "maple/mrt/codetricks/arch/arm64/hashCode.s", 29 } }, + { "Ljava_2Flang_2FString_3B_7Cequals_7C_28Ljava_2Flang_2FObject_3B_29Z", + { "maple/mrt/codetricks/arch/arm64/stringEquals.s", 50 } } +}; +constexpr uint32 kQuadInsnCount = 2; +constexpr uint32 kInsnSize = 4; + +void GetMethodLabel(const std::string &methodName, std::string &methodLabel) { + methodLabel = ".Lmethod_desc." + methodName; +} +} + +namespace maplebe { +using namespace maple; + +void AArch64AsmEmitter::EmitRefToMethodDesc(FuncEmitInfo &funcEmitInfo, Emitter &emitter) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + if (!cgFunc.GetFunction().IsJava()) { + return; + } + std::string methodDescLabel; + GetMethodLabel(cgFunc.GetFunction().GetName(), methodDescLabel); + emitter.Emit("\t.word " + methodDescLabel + "-.\n"); + emitter.IncreaseJavaInsnCount(); +} + +void AArch64AsmEmitter::EmitRefToMethodInfo(FuncEmitInfo &funcEmitInfo, Emitter &emitter) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + if (cgFunc.GetFunction().GetModule()->IsJavaModule()) { + std::string labelName = ".Label.name." + cgFunc.GetFunction().GetName(); + emitter.Emit("\t.word " + labelName + " - .\n"); + } +} + +/* + * emit java method description which contains address and size of local reference area + * as well as method metadata. + */ +void AArch64AsmEmitter::EmitMethodDesc(FuncEmitInfo &funcEmitInfo, Emitter &emitter) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + if (!cgFunc.GetFunction().IsJava()) { + return; + } + emitter.Emit("\t.section\t.rodata\n"); + emitter.Emit("\t.align\t2\n"); + std::string methodInfoLabel; + GetMethodLabel(cgFunc.GetFunction().GetName(), methodInfoLabel); + emitter.Emit(methodInfoLabel + ":\n"); + EmitRefToMethodInfo(funcEmitInfo, emitter); + /* local reference area */ + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 refOffset = memLayout->GetRefLocBaseLoc(); + uint32 refNum = memLayout->GetSizeOfRefLocals() / kOffsetAlign; + /* for ea usage */ + AArch64CGFunc &aarchCGFunc = static_cast(cgFunc); + IntrinsiccallNode *cleanEANode = aarchCGFunc.GetCleanEANode(); + if (cleanEANode != nullptr) { + refNum += static_cast(cleanEANode->NumOpnds()); + refOffset -= static_cast(cleanEANode->NumOpnds() * kIntregBytelen); + } + emitter.Emit("\t.short ").Emit(refOffset).Emit("\n"); + emitter.Emit("\t.short ").Emit(refNum).Emit("\n"); +} + +/* the fast_exception_handling lsda */ +void AArch64AsmEmitter::EmitFastLSDA(FuncEmitInfo &funcEmitInfo) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + AArch64CGFunc &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + + Emitter *emitter = currCG->GetEmitter(); + PUIdx pIdx = currCG->GetMIRModule()->CurFunction()->GetPuidx(); + const std::string &idx = strdup(std::to_string(pIdx).c_str()); + /* + * .word 0xFFFFFFFF + * .word .Label.LTest_3B_7C_3Cinit_3E_7C_28_29V3-func_start_label + */ + (void)emitter->Emit("\t.word 0xFFFFFFFF\n"); + (void)emitter->Emit("\t.word .L." + idx + "__"); + if (aarchCGFunc.NeedCleanup()) { + emitter->Emit(cgFunc.GetCleanupLabel()->GetLabelIdx()); + } else { + ASSERT(!cgFunc.GetExitBBsVec().empty(), "exitbbsvec is empty in AArch64AsmEmitter::EmitFastLSDA"); + emitter->Emit(cgFunc.GetExitBB(0)->GetLabIdx()); + } + emitter->Emit("-.L." + idx + "__") + .Emit(cgFunc.GetStartLabel()->GetLabelIdx()) + .Emit("\n"); + emitter->IncreaseJavaInsnCount(); +} + +/* the normal gcc_except_table */ +void AArch64AsmEmitter::EmitFullLSDA(FuncEmitInfo &funcEmitInfo) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + AArch64CGFunc &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + EHFunc *ehFunc = cgFunc.GetEHFunc(); + Emitter *emitter = currCG->GetEmitter(); + /* emit header */ + emitter->Emit("\t.align 2\n"); + emitter->Emit("\t.section .gcc_except_table,\"a\",@progbits\n"); + emitter->Emit("\t.align 2\n"); + /* emit LSDA header */ + LSDAHeader *lsdaHeader = ehFunc->GetLSDAHeader(); + emitter->EmitStmtLabel(lsdaHeader->GetLSDALabel()->GetLabelIdx()); + emitter->Emit("\t.byte ").Emit(lsdaHeader->GetLPStartEncoding()).Emit("\n"); + emitter->Emit("\t.byte ").Emit(lsdaHeader->GetTTypeEncoding()).Emit("\n"); + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(lsdaHeader->GetTTypeOffset()); + emitter->EmitStmtLabel(lsdaHeader->GetTTypeOffset().GetStartOffset()->GetLabelIdx()); + /* emit call site table */ + emitter->Emit("\t.byte ").Emit(lsdaHeader->GetCallSiteEncoding()).Emit("\n"); + /* callsite table size */ + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(ehFunc->GetLSDACallSiteTable()->GetCSTable()); + /* callsite start */ + emitter->EmitStmtLabel(ehFunc->GetLSDACallSiteTable()->GetCSTable().GetStartOffset()->GetLabelIdx()); + ehFunc->GetLSDACallSiteTable()->SortCallSiteTable([&aarchCGFunc](LSDACallSite *a, LSDACallSite *b) { + CHECK_FATAL(a != nullptr, "nullptr check"); + CHECK_FATAL(b != nullptr, "nullptr check"); + LabelIDOrder id1 = aarchCGFunc.GetLabelOperand(a->csStart.GetEndOffset()->GetLabelIdx())->GetLabelOrder(); + LabelIDOrder id2 = aarchCGFunc.GetLabelOperand(b->csStart.GetEndOffset()->GetLabelIdx())->GetLabelOrder(); + /* id1 and id2 should not be default value -1u */ + CHECK_FATAL(id1 != 0xFFFFFFFF, "illegal label order assigned"); + CHECK_FATAL(id2 != 0xFFFFFFFF, "illegal label order assigned"); + return id1 < id2; + }); + const MapleVector &callSiteTable = ehFunc->GetLSDACallSiteTable()->GetCallSiteTable(); + for (size_t i = 0; i < callSiteTable.size(); ++i) { + LSDACallSite *lsdaCallSite = callSiteTable[i]; + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(lsdaCallSite->csStart); + + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(lsdaCallSite->csLength); + + if (lsdaCallSite->csLandingPad.GetStartOffset()) { + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(lsdaCallSite->csLandingPad); + } else { + ASSERT(lsdaCallSite->csAction == 0, "csAction error!"); + emitter->Emit("\t.uleb128 "); + if (aarchCGFunc.NeedCleanup()) { + /* if landing pad is 0, we emit this call site as cleanup code */ + LabelPair cleaupCode; + cleaupCode.SetStartOffset(cgFunc.GetStartLabel()); + cleaupCode.SetEndOffset(cgFunc.GetCleanupLabel()); + emitter->EmitLabelPair(cleaupCode); + } else if (cgFunc.GetFunction().IsJava()) { + ASSERT(!cgFunc.GetExitBBsVec().empty(), "exitbbsvec is empty in AArch64Emitter::EmitFullLSDA"); + PUIdx pIdx = cgFunc.GetMirModule().CurFunction()->GetPuidx(); + const std::string &idx = strdup(std::to_string(pIdx).c_str()); + (void)emitter->Emit(".L." + idx).Emit("__").Emit(cgFunc.GetExitBB(0)->GetLabIdx()); + (void)emitter->Emit(" - .L." + idx).Emit("__").Emit(cgFunc.GetStartLabel()->GetLabelIdx()).Emit("\n"); + } else { + emitter->Emit("0\n"); + } + } + emitter->Emit("\t.uleb128 ").Emit(lsdaCallSite->csAction).Emit("\n"); + } + + /* + * quick hack: insert a call site entry for the whole function body. + * this will hand in any pending (uncaught) exception to its caller. Note that + * __gxx_personality_v0 in libstdc++ is coded so that if exception table exists, + * the call site table must have an entry for any possibly raised exception, + * otherwise __cxa_call_terminate will be invoked immediately, thus the caller + * does not get the chance to take charge. + */ + if (aarchCGFunc.NeedCleanup() || cgFunc.GetFunction().IsJava()) { + /* call site for clean-up */ + LabelPair funcStart; + funcStart.SetStartOffset(cgFunc.GetStartLabel()); + funcStart.SetEndOffset(cgFunc.GetStartLabel()); + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(funcStart); + LabelPair funcLength; + funcLength.SetStartOffset(cgFunc.GetStartLabel()); + funcLength.SetEndOffset(cgFunc.GetCleanupLabel()); + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(funcLength); + LabelPair cleaupCode; + cleaupCode.SetStartOffset(cgFunc.GetStartLabel()); + cleaupCode.SetEndOffset(cgFunc.GetCleanupLabel()); + emitter->Emit("\t.uleb128 "); + if (aarchCGFunc.NeedCleanup()) { + emitter->EmitLabelPair(cleaupCode); + } else { + ASSERT(!cgFunc.GetExitBBsVec().empty(), "exitbbsvec is empty in AArch64AsmEmitter::EmitFullLSDA"); + PUIdx pIdx = cgFunc.GetMirModule().CurFunction()->GetPuidx(); + const std::string &idx = strdup(std::to_string(pIdx).c_str()); + (void)emitter->Emit(".L." + idx).Emit("__").Emit(cgFunc.GetExitBB(0)->GetLabIdx()); + (void)emitter->Emit(" - .L." + idx).Emit("__").Emit(cgFunc.GetStartLabel()->GetLabelIdx()).Emit("\n"); + } + emitter->Emit("\t.uleb128 0\n"); + if (!cgFunc.GetFunction().IsJava()) { + /* call site for stack unwind */ + LabelPair unwindStart; + unwindStart.SetStartOffset(cgFunc.GetStartLabel()); + unwindStart.SetEndOffset(cgFunc.GetCleanupLabel()); + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(unwindStart); + LabelPair unwindLength; + unwindLength.SetStartOffset(cgFunc.GetCleanupLabel()); + unwindLength.SetEndOffset(cgFunc.GetEndLabel()); + emitter->Emit("\t.uleb128 "); + emitter->EmitLabelPair(unwindLength); + emitter->Emit("\t.uleb128 0\n"); + emitter->Emit("\t.uleb128 0\n"); + } + } + /* callsite end label */ + emitter->EmitStmtLabel(ehFunc->GetLSDACallSiteTable()->GetCSTable().GetEndOffset()->GetLabelIdx()); + /* tt */ + const LSDAActionTable *lsdaActionTable = ehFunc->GetLSDAActionTable(); + for (size_t i = 0; i < lsdaActionTable->Size(); ++i) { + LSDAAction *lsdaAction = lsdaActionTable->GetActionTable().at(i); + emitter->Emit("\t.byte ").Emit(lsdaAction->GetActionIndex()).Emit("\n"); + emitter->Emit("\t.byte ").Emit(lsdaAction->GetActionFilter()).Emit("\n"); + } + emitter->Emit("\t.align 2\n"); + for (int32 i = ehFunc->GetEHTyTableSize() - 1; i >= 0; i--) { + MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(ehFunc->GetEHTyTableMember(i)); + MIRTypeKind typeKind = mirType->GetKind(); + if (((typeKind == kTypeScalar) && (mirType->GetPrimType() == PTY_void)) || (typeKind == kTypeStructIncomplete) || + (typeKind == kTypeInterfaceIncomplete)) { + continue; + } + CHECK_FATAL((typeKind == kTypeClass) || (typeKind == kTypeClassIncomplete), "NYI"); + const std::string &tyName = GlobalTables::GetStrTable().GetStringFromStrIdx(mirType->GetNameStrIdx()); + std::string dwRefString(".LDW.ref."); + dwRefString += CLASSINFO_PREFIX_STR; + dwRefString += tyName; + dwRefString += " - ."; + emitter->Emit("\t.4byte " + dwRefString + "\n"); + } + /* end of lsda */ + emitter->EmitStmtLabel(lsdaHeader->GetTTypeOffset().GetEndOffset()->GetLabelIdx()); +} + +void AArch64AsmEmitter::EmitBBHeaderLabel(FuncEmitInfo &funcEmitInfo, const std::string &name, LabelIdx labIdx) { + (void)name; + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + AArch64CGFunc &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + Emitter &emitter = *(currCG->GetEmitter()); + LabelOperand &label = aarchCGFunc.GetOrCreateLabelOperand(labIdx); + /* if label order is default value -1, set new order */ + if (label.GetLabelOrder() == 0xFFFFFFFF) { + label.SetLabelOrder(currCG->GetLabelOrderCnt()); + currCG->IncreaseLabelOrderCnt(); + } + PUIdx pIdx = currCG->GetMIRModule()->CurFunction()->GetPuidx(); + const char *puIdx = strdup(std::to_string(pIdx).c_str()); + const std::string &labelName = cgFunc.GetFunction().GetLabelTab()->GetName(labIdx); + if (currCG->GenerateVerboseCG()) { + emitter.Emit(".L.").Emit(puIdx).Emit("__").Emit(labIdx).Emit(":\t//label order ").Emit(label.GetLabelOrder()); + if (!labelName.empty() && labelName.at(0) != '@') { + /* If label name has @ as its first char, it is not from MIR */ + emitter.Emit(", MIR: @").Emit(labelName).Emit("\n"); + } else { + emitter.Emit("\n"); + } + } else { + emitter.Emit(".L.").Emit(puIdx).Emit("__").Emit(labIdx).Emit(":\n"); + } +} + +void AArch64AsmEmitter::EmitJavaInsnAddr(FuncEmitInfo &funcEmitInfo) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + if (cgFunc.GetFunction().IsJava()) { + Emitter *emitter = cgFunc.GetCG()->GetEmitter(); + /* emit a comment of current address from the begining of java text section */ + std::stringstream ss; + ss << "\n\t// addr: 0x" << std::hex << (emitter->GetJavaInsnCount() * kInsnSize) << "\n"; + cgFunc.GetCG()->GetEmitter()->Emit(ss.str()); + } +} + +void AArch64AsmEmitter::Run(FuncEmitInfo &funcEmitInfo) { + CGFunc &cgFunc = funcEmitInfo.GetCGFunc(); + AArch64CGFunc &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + /* emit header of this function */ + Emitter &emitter = *currCG->GetEmitter(); + // insert for __cxx_global_var_init + if (cgFunc.GetName() == "__cxx_global_var_init") { + (void)emitter.Emit("\t.section\t.init_array,\"aw\"\n"); + (void)emitter.Emit("\t.quad\t").Emit(cgFunc.GetName()).Emit("\n"); + } + emitter.Emit("\n"); + EmitMethodDesc(funcEmitInfo, emitter); + /* emit java code to the java section. */ + if (cgFunc.GetFunction().IsJava()) { + std::string sectionName = namemangler::kMuidJavatextPrefixStr; + (void)emitter.Emit("\t.section ." + sectionName + ",\"ax\"\n"); + } else { + (void)emitter.Emit("\t.text\n"); + } + (void)emitter.Emit("\t.align 2\n"); + MIRSymbol *funcSt = GlobalTables::GetGsymTable().GetSymbolFromStidx(cgFunc.GetFunction().GetStIdx().Idx()); + const std::string &funcName = std::string(cgFunc.GetShortFuncName().c_str()); + + // manually replace function with optimized assembly language + if (CGOptions::IsReplaceASM()) { + auto it = func2CodeInsnMap.find(funcSt->GetName()); + if (it != func2CodeInsnMap.end()) { + std::string optFile = it->second.first; + struct stat buffer; + if (stat(optFile.c_str(), &buffer) == 0) { + std::ifstream codetricksFd(optFile); + if (!codetricksFd.is_open()) { + ERR(kLncErr, " %s open failed!", optFile.c_str()); + LogInfo::MapleLogger() << "wrong" << '\n'; + } else { + std::string contend; + while (getline(codetricksFd, contend)) { + emitter.Emit(contend + "\n"); + } + } + } + emitter.IncreaseJavaInsnCount(it->second.second); +#ifdef EMIT_INSN_COUNT + EmitJavaInsnAddr(funcEmitInfo); +#endif /* ~EMIT_INSN_COUNT */ + return; + } + } + std::string funcStName = funcSt->GetName(); + if (funcSt->GetFunction()->GetAttr(FUNCATTR_weak)) { + (void)emitter.Emit("\t.weak\t" + funcStName + "\n"); + (void)emitter.Emit("\t.hidden\t" + funcStName + "\n"); + } else if (funcSt->GetFunction()->GetAttr(FUNCATTR_local)) { + (void)emitter.Emit("\t.local\t" + funcStName + "\n"); + } else { + bool isExternFunction = false; + (void)emitter.Emit("\t.globl\t").Emit(funcSt->GetName()).Emit("\n"); + if (!currCG->GetMIRModule()->IsCModule() || !isExternFunction) { + (void)emitter.Emit("\t.hidden\t").Emit(funcSt->GetName()).Emit("\n"); + } + } + (void)emitter.Emit("\t.type\t" + funcStName + ", %function\n"); + /* add these messege , solve the simpleperf tool error */ + EmitRefToMethodDesc(funcEmitInfo, emitter); + (void)emitter.Emit(funcStName + ":\n"); + /* if the last insn is call, then insert nop */ + bool found = false; + FOR_ALL_BB_REV(bb, &aarchCGFunc) { + FOR_BB_INSNS_REV(insn, bb) { + if (insn->IsMachineInstruction()) { + if (insn->IsCall()) { + Insn &newInsn = currCG->BuildInstruction(MOP_nop); + bb->InsertInsnAfter(*insn, newInsn); + } + found = true; + break; + } + } + if (found) { + break; + } + } + /* emit instructions */ + FOR_ALL_BB(bb, &aarchCGFunc) { + if (currCG->GenerateVerboseCG()) { + emitter.Emit("# freq:").Emit(bb->GetFrequency()).Emit("\n"); + } + /* emit bb headers */ + if (bb->GetLabIdx() != 0) { + EmitBBHeaderLabel(funcEmitInfo, funcName, bb->GetLabIdx()); + } + + FOR_BB_INSNS(insn, bb) { + insn->Emit(*currCG, emitter); + } + } + if (CGOptions::IsMapleLinker()) { + /* Emit a label for calculating method size */ + (void)emitter.Emit(".Label.end." + funcStName + ":\n"); + } + (void)emitter.Emit("\t.size\t" + funcStName + ", .-").Emit(funcStName + "\n"); + + EHFunc *ehFunc = cgFunc.GetEHFunc(); + /* emit LSDA */ + if (ehFunc != nullptr) { + if (!cgFunc.GetHasProEpilogue()) { + emitter.Emit("\t.word 0x55555555\n"); + emitter.IncreaseJavaInsnCount(); + } else if (ehFunc->NeedFullLSDA()) { + LSDAHeader *lsdaHeader = ehFunc->GetLSDAHeader(); + PUIdx pIdx = emitter.GetCG()->GetMIRModule()->CurFunction()->GetPuidx(); + const std::string &idx = strdup(std::to_string(pIdx).c_str()); + /* .word .Label.lsda_label-func_start_label */ + (void)emitter.Emit("\t.word .L." + idx).Emit("__").Emit(lsdaHeader->GetLSDALabel()->GetLabelIdx()); + (void)emitter.Emit("-.L." + idx).Emit("__").Emit(cgFunc.GetStartLabel()->GetLabelIdx()).Emit("\n"); + emitter.IncreaseJavaInsnCount(); + } else if (ehFunc->NeedFastLSDA()) { + EmitFastLSDA(funcEmitInfo); + } + } + uint32 size = cgFunc.GetFunction().GetSymTab()->GetSymbolTableSize(); + for (size_t i = 0; i < size; ++i) { + MIRSymbol *st = cgFunc.GetFunction().GetSymTab()->GetSymbolFromStIdx(i); + if (st == nullptr) { + continue; + } + MIRStorageClass storageClass = st->GetStorageClass(); + MIRSymKind symKind = st->GetSKind(); + if (storageClass == kScPstatic && symKind == kStConst) { + emitter.Emit("\t.align 2\n" + st->GetName() + ":\n"); + if (st->GetKonst()->GetKind() == kConstStr16Const) { + MIRStr16Const *str16Const = safe_cast(st->GetKonst()); + emitter.EmitStr16Constant(*str16Const); + emitter.Emit("\n"); + continue; + } + if (st->GetKonst()->GetKind() == kConstStrConst) { + MIRStrConst *strConst = safe_cast(st->GetKonst()); + emitter.EmitStrConstant(*strConst); + emitter.Emit("\n"); + continue; + } + + switch (st->GetKonst()->GetType().GetPrimType()) { + case PTY_u32: { + MIRIntConst *intConst = safe_cast(st->GetKonst()); + emitter.Emit("\t.long ").Emit(static_cast(intConst->GetValue())).Emit("\n"); + emitter.IncreaseJavaInsnCount(); + break; + } + case PTY_f32: { + MIRFloatConst *floatConst = safe_cast(st->GetKonst()); + emitter.Emit("\t.word ").Emit(static_cast(floatConst->GetIntValue())).Emit("\n"); + emitter.IncreaseJavaInsnCount(); + break; + } + case PTY_f64: { + MIRDoubleConst *doubleConst = safe_cast(st->GetKonst()); + emitter.Emit("\t.word ").Emit(doubleConst->GetIntLow32()).Emit("\n"); + emitter.IncreaseJavaInsnCount(); + emitter.Emit("\t.word ").Emit(doubleConst->GetIntHigh32()).Emit("\n"); + emitter.IncreaseJavaInsnCount(); + break; + } + default: + ASSERT(false, "NYI"); + break; + } + } + } + + for (auto *st : cgFunc.GetEmitStVec()) { + /* emit switch table only here */ + ASSERT(st->IsReadOnly(), "NYI"); + emitter.Emit("\n"); + emitter.Emit("\t.align 3\n"); + emitter.IncreaseJavaInsnCount(0, true); /* just aligned */ + emitter.Emit(st->GetName() + ":\n"); + MIRAggConst *arrayConst = safe_cast(st->GetKonst()); + CHECK_FATAL(arrayConst != nullptr, "null ptr check"); + PUIdx pIdx = cgFunc.GetMirModule().CurFunction()->GetPuidx(); + const std::string &idx = strdup(std::to_string(pIdx).c_str()); + for (size_t i = 0; i < arrayConst->GetConstVec().size(); i++) { + MIRLblConst *lblConst = safe_cast(arrayConst->GetConstVecItem(i)); + CHECK_FATAL(lblConst != nullptr, "null ptr check"); + (void)emitter.Emit("\t.quad\t.L." + idx).Emit("__").Emit(lblConst->GetValue()); + (void)emitter.Emit(" - " + st->GetName() + "\n"); + emitter.IncreaseJavaInsnCount(kQuadInsnCount); + } + } + + for (const auto &mpPair : cgFunc.GetLabelAndValueMap()) { + LabelOperand &labelOpnd = aarchCGFunc.GetOrCreateLabelOperand(mpPair.first); + labelOpnd.Emit(emitter, nullptr); + emitter.Emit(":\n"); + emitter.Emit("\t.quad ").Emit(mpPair.second).Emit("\n"); + emitter.IncreaseJavaInsnCount(kQuadInsnCount); + } + + if (ehFunc != nullptr && ehFunc->NeedFullLSDA()) { + EmitFullLSDA(funcEmitInfo); + } +#ifdef EMIT_INSN_COUNT + if (cgFunc.GetFunction().IsJava()) { + EmitJavaInsnAddr(funcEmitInfo); + } +#endif /* ~EMIT_INSN_COUNT */ +} + +AnalysisResult *CgDoEmission::Run(CGFunc *cgFunc, CgFuncResultMgr *cgFuncResultMgr) { + (void)cgFuncResultMgr; + ASSERT(cgFunc != nullptr, "null ptr check"); + Emitter *emitter = cgFunc->GetCG()->GetEmitter(); + CHECK_NULL_FATAL(emitter); + AsmFuncEmitInfo funcEmitInfo(*cgFunc); + static_cast(emitter)->Run(funcEmitInfo); + return nullptr; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_fixshortbranch.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_fixshortbranch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4342aafde99209c5850c84808f76b2a08460adb3 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_fixshortbranch.cpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_fixshortbranch.h" +#include "cg.h" +#include "mpl_logging.h" +#include "common_utils.h" + +namespace maplebe { +/* + * Check the distance between the first insn of BB with the lable(targ_labidx) + * and the insn with targ_id. If the distance greater than kShortBRDistance + * return false. + */ +bool AArch64FixShortBranch::DistanceCheck(const BB &bb, LabelIdx targLabIdx, uint32 targId) { + for (auto *tBB : bb.GetSuccs()) { + if (tBB->GetLabIdx() != targLabIdx) { + continue; + } + Insn *tInsn = tBB->GetFirstInsn(); + while (tInsn == nullptr || !tInsn->IsMachineInstruction()) { + if (tInsn == nullptr) { + tBB = tBB->GetNext(); + tInsn = tBB->GetFirstInsn(); + } else { + tInsn = tInsn->GetNext(); + } + } + uint32 tmp = (tInsn->GetId() > targId) ? (tInsn->GetId() - targId) : (targId - tInsn->GetId()); + return (tmp < kShortBRDistance); + } + CHECK_FATAL(false, "CFG error"); +} + +void AArch64FixShortBranch::SetInsnId(){ + uint32 i = 0; + AArch64CGFunc *aarch64CGFunc = static_cast(cgFunc); + FOR_ALL_BB(bb, aarch64CGFunc) { + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + i += insn->GetAtomicNum(); + insn->SetId(i); + if (insn->GetMachineOpcode() == MOP_adrp_ldr && CGOptions::IsLazyBinding() && !cgFunc->GetCG()->IsLibcore()) { + /* For 1 additional EmitLazyBindingRoutine in lazybinding + * see function AArch64Insn::Emit in file aarch64_insn.cpp + */ + ++i; + } + } + } +} + +/* + * TBZ/TBNZ instruction is generated under -O2, these branch instructions only have a range of +/-32KB. + * If the branch target is not reachable, we split tbz/tbnz into combination of ubfx and cbz/cbnz, which + * will clobber one extra register. With LSRA under -O2, we can use one of the reserved registers R16 for + * that purpose. To save compile time, we do this change when there are more than 32KB / 4 instructions + * in the function. + */ +void AArch64FixShortBranch::FixShortBranches() { + AArch64CGFunc *aarch64CGFunc = static_cast(cgFunc); + SetInsnId(); + FOR_ALL_BB(bb, aarch64CGFunc) { + /* Do a backward scan searching for short branches */ + FOR_BB_INSNS_REV(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + MOperator thisMop = insn->GetMachineOpcode(); + if (thisMop != MOP_wtbz && thisMop != MOP_wtbnz && thisMop != MOP_xtbz && thisMop != MOP_xtbnz) { + continue; + } + LabelOperand &label = static_cast(insn->GetOperand(kInsnThirdOpnd)); + /* should not be commented out after bug fix */ + if (DistanceCheck(*bb, label.GetLabelIndex(), insn->GetId())) { + continue; + } + auto ® = static_cast(insn->GetOperand(kInsnFirstOpnd)); + ImmOperand &bitSize = aarch64CGFunc->CreateImmOperand(1, k8BitSize, false); + auto &bitPos = static_cast(insn->GetOperand(kInsnSecondOpnd)); + MOperator ubfxOp = MOP_undef; + MOperator cbOp = MOP_undef; + switch (thisMop) { + case MOP_wtbz: + ubfxOp = MOP_wubfxrri5i5; + cbOp = MOP_wcbz; + break; + case MOP_wtbnz: + ubfxOp = MOP_wubfxrri5i5; + cbOp = MOP_wcbnz; + break; + case MOP_xtbz: + ubfxOp = MOP_xubfxrri6i6; + cbOp = MOP_xcbz; + break; + case MOP_xtbnz: + ubfxOp = MOP_xubfxrri6i6; + cbOp = MOP_xcbnz; + break; + default: + break; + } + AArch64RegOperand &tmp = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(R16, (ubfxOp == MOP_wubfxrri5i5) ? k32BitSize : k64BitSize, + kRegTyInt); + (void)bb->InsertInsnAfter(*insn, cg->BuildInstruction(cbOp, tmp, label)); + (void)bb->InsertInsnAfter(*insn, cg->BuildInstruction(ubfxOp, tmp, reg, bitPos, bitSize)); + bb->RemoveInsn(*insn); + break; + } + } +} + +AnalysisResult *CgFixShortBranch::Run(CGFunc *cgFunc, CgFuncResultMgr *cgFuncResultMgr) { + (void)cgFuncResultMgr; + ASSERT(cgFunc != nullptr, "nullptr check"); + MemPool *memPool = memPoolCtrler.NewMemPool("fixShortBranches"); + auto *fixShortBranch = memPool->New(cgFunc); + CHECK_FATAL(fixShortBranch != nullptr, "AArch64FixShortBranch instance create failure"); + fixShortBranch->FixShortBranches(); + memPoolCtrler.DeleteMemPool(memPool); + return nullptr; +} +} /* namespace maplebe */ + diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_global.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_global.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e603e56e82327f7628a7452a995855c4a56e0511 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_global.cpp @@ -0,0 +1,971 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_global.h" +#include "riscv64_reaching.h" +#include "riscv64_cg.h" +#include "riscv64_live.h" + +namespace maplebe { +using namespace maple; + +void AArch64GlobalOpt::Run() { + if (cgFunc.NumBBs() > kMaxBBNum || cgFunc.GetRD()->GetMaxInsnNO() > kMaxInsnNum) { + return; + } + OptimizeManager optManager(cgFunc); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); + optManager.Optimize(); +} + +/* if used Operand in insn is defined by zero in all define insn, return true */ +bool OptimizePattern::OpndDefByZero(Insn &insn, int32 useIdx) const { + ASSERT(insn.GetOperand(useIdx).IsRegister(), "the used Operand must be Register"); + /* Zero Register don't need be defined */ + if (insn.GetOperand(useIdx).IsZeroRegister()) { + return true; + } + + InsnSet defInsns = cgFunc.GetRD()->FindDefForRegOpnd(insn, useIdx); + ASSERT(!defInsns.empty(), "operand must be defined before used"); + for (auto &defInsn : defInsns) { + if (!InsnDefZero(*defInsn)) { + return false; + } + } + return true; +} + +/* if used Operand in insn is defined by one in all define insn, return true */ +bool OptimizePattern::OpndDefByOne(Insn &insn, int32 useIdx) const { + ASSERT(insn.GetOperand(useIdx).IsRegister(), "the used Operand must be Register"); + /* Zero Register don't need be defined */ + if (insn.GetOperand(useIdx).IsZeroRegister()) { + return false; + } + InsnSet defInsns = cgFunc.GetRD()->FindDefForRegOpnd(insn, useIdx); + ASSERT(!defInsns.empty(), "operand must be defined before used"); + for (auto &defInsn : defInsns) { + if (!InsnDefOne(*defInsn)) { + return false; + } + } + return true; +} + +/* if used Operand in insn is defined by one valid bit in all define insn, return true */ +bool OptimizePattern::OpndDefByOneOrZero(Insn &insn, int32 useIdx) const { + if (insn.GetOperand(useIdx).IsZeroRegister()) { + return true; + } + + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(insn, useIdx); + ASSERT(!defInsnSet.empty(), "Operand must be defined before used"); + + for (auto &defInsn : defInsnSet) { + if (!InsnDefOneOrZero(*defInsn)) { + return false; + } + } + return true; +} + +/* if defined operand(must be first insn currently) in insn is const one, return true */ +bool OptimizePattern::InsnDefOne(Insn &insn) { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &srcOpnd = insn.GetOperand(1); + ASSERT(srcOpnd.IsIntImmediate(), "expects ImmOperand"); + ImmOperand &srcConst = static_cast(srcOpnd); + int64 srcConstValue = srcConst.GetValue(); + if (srcConstValue == 1) { + return true; + } + return false; + } + default: + return false; + } +} + +/* if defined operand(must be first insn currently) in insn is const zero, return true */ +bool OptimizePattern::InsnDefZero(Insn &insn) { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &srcOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(srcOpnd.IsIntImmediate(), "expects ImmOperand"); + ImmOperand &srcConst = static_cast(srcOpnd); + int64 srcConstValue = srcConst.GetValue(); + if (srcConstValue == 0) { + return true; + } + return false; + } + case MOP_xmovrr: + case MOP_wmovrr: + return insn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + default: + return false; + } +} + +/* if defined operand(must be first insn currently) in insn has only one valid bit, return true */ +bool OptimizePattern::InsnDefOneOrZero(Insn &insn) { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_wcsetrc: + case MOP_xcsetrc: + return true; + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + if (defConstValue != 0 && defConstValue != 1) { + return false; + } else { + return true; + } + } + case MOP_xmovrr: + case MOP_wmovrr: { + return insn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + } + case MOP_wlsrrri5: + case MOP_xlsrrri6: { + Operand &opnd2 = insn.GetOperand(kInsnThirdOpnd); + ASSERT(opnd2.IsIntImmediate(), "expects ImmOperand"); + ImmOperand &opndImm = static_cast(opnd2); + int64 shiftBits = opndImm.GetValue(); + if (((defMop == MOP_wlsrrri5) && (shiftBits == k32BitSize - 1)) || + ((defMop == MOP_xlsrrri6) && (shiftBits == k64BitSize - 1))) { + return true; + } else { + return false; + } + } + default: + return false; + } +} + +void OptimizePattern::ReplaceAllUsedOpndWithNewOpnd(const InsnSet &useInsnSet, uint32 regNO, + Operand &newOpnd, bool updateInfo) const { + for (auto useInsn : useInsnSet) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(useInsn)->GetMachineOpcode()]; + uint32 opndNum = useInsn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = useInsn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsRegUse() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == regNO)) { + useInsn->SetOperand(i, newOpnd); + if (updateInfo) { + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } else if (opnd.IsMemoryAccessOperand()) { + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + MemOperand *newMem = nullptr; + if (base != nullptr && (base->GetRegisterNumber() == regNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetBaseRegister(*static_cast(&newOpnd)); + useInsn->SetOperand(i, *newMem); + if (updateInfo) { + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } + if (index != nullptr && (index->GetRegisterNumber() == regNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetIndexRegister(*static_cast(&newOpnd)); + useInsn->SetOperand(i, *newMem); + if (updateInfo) { + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } + } + } + } +} + +bool ForwardPropPattern::CheckCondition(Insn &insn) { + if (!insn.IsMachineInstruction()) { + return false; + } + if ((insn.GetMachineOpcode() != MOP_xmovrr) && (insn.GetMachineOpcode() != MOP_wmovrr)) { + return false; + } + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + RegOperand &firstRegOpnd = static_cast(firstOpnd); + RegOperand &secondRegOpnd = static_cast(secondOpnd); + uint32 firstRegNO = firstRegOpnd.GetRegisterNumber(); + uint32 secondRegNO = secondRegOpnd.GetRegisterNumber(); + if (firstRegOpnd.IsZeroRegister() || !firstRegOpnd.IsVirtualRegister() || !secondRegOpnd.IsVirtualRegister()) { + return false; + } + firstRegUseInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(insn, firstRegNO, true); + if (firstRegUseInsnSet.empty()) { + return false; + } + InsnSet secondRegDefInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(insn, secondRegNO, true); + if (secondRegDefInsnSet.size() != 1 || RegOperand::IsSameReg(firstOpnd, secondOpnd)) { + return false; + } + bool toDoOpt = true; + for (auto useInsn : firstRegUseInsnSet) { + if (!cgFunc.GetRD()->RegIsLiveBetweenInsn(secondRegNO, insn, *useInsn)) { + toDoOpt = false; + break; + } + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(*useInsn, firstRegNO, true); + if (defInsnSet.size() > 1) { + toDoOpt = false; + break; + } + } + return toDoOpt; +} + +void ForwardPropPattern::Optimize(Insn &insn) { + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + RegOperand &firstRegOpnd = static_cast(firstOpnd); + uint32 firstRegNO = firstRegOpnd.GetRegisterNumber(); + + for (auto *useInsn : firstRegUseInsnSet) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(useInsn)->GetMachineOpcode()]; + uint32 opndNum = useInsn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = useInsn->GetOperand(i); + const AArch64OpndProp *regProp = md->GetOperand(i); + if (!regProp->IsRegUse() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == firstRegNO)) { + useInsn->SetOperand(i, secondOpnd); + if (((useInsn->GetMachineOpcode() == MOP_xmovrr) || (useInsn->GetMachineOpcode() == MOP_wmovrr)) && + (static_cast(useInsn->GetOperand(kInsnSecondOpnd)).IsVirtualRegister()) && + (static_cast(useInsn->GetOperand(kInsnFirstOpnd)).IsVirtualRegister())) { + (void)modifiedBB.insert(useInsn->GetBB()); + } + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } else if (opnd.IsMemoryAccessOperand()) { + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + MemOperand *newMem = nullptr; + if (base != nullptr && (base->GetRegisterNumber() == firstRegNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetBaseRegister(static_cast(secondOpnd)); + useInsn->SetOperand(i, *newMem); + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + if ((index != nullptr) && (index->GetRegisterNumber() == firstRegNO)) { + newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetIndexRegister(static_cast(secondOpnd)); + useInsn->SetOperand(i, *newMem); + cgFunc.GetRD()->InitGenUse(*useInsn->GetBB(), false); + } + } + } + } + insn.SetOperand(0, secondOpnd); + cgFunc.GetRD()->UpdateInOut(*insn.GetBB(), true); +} + +void ForwardPropPattern::Init() { + firstRegUseInsnSet.clear(); +} + +void ForwardPropPattern::Run() { + bool secondTime = false; + do { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsUnreachable() || (secondTime && modifiedBB.find(bb) == modifiedBB.end())) { + continue; + } + + if (secondTime) { + modifiedBB.erase(bb); + } + + FOR_BB_INSNS(insn, bb) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } + secondTime = true; + } while (!modifiedBB.empty()); +} + +bool BackPropPattern::CheckAndGetOpnd(Insn &insn) { + if (!insn.IsMachineInstruction()) { + return false; + } + if ((insn.GetMachineOpcode() != MOP_xmovrr) && (insn.GetMachineOpcode() != MOP_wmovrr)) { + return false; + } + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + if (RegOperand::IsSameReg(firstOpnd, secondOpnd)) { + return false; + } + + firstRegOpnd = &static_cast(firstOpnd); + secondRegOpnd = &static_cast(secondOpnd); + if (firstRegOpnd->IsZeroRegister() || !secondRegOpnd->IsVirtualRegister()) { + return false; + } + firstRegNO = firstRegOpnd->GetRegisterNumber(); + secondRegNO = secondRegOpnd->GetRegisterNumber(); + return true; +} + +bool BackPropPattern::DestOpndHasUseInsns(Insn &insn) { + BB &bb = *insn.GetBB(); + InsnSet useInsnSetOfFirstOpnd; + bool findRes = cgFunc.GetRD()->FindRegUseBetweenInsn(firstRegNO, insn.GetNext(), + bb.GetLastInsn(), useInsnSetOfFirstOpnd); + if ((findRes && useInsnSetOfFirstOpnd.empty()) || + (!findRes && useInsnSetOfFirstOpnd.empty() && !bb.GetLiveOut()->TestBit(firstRegNO))) { + return false; + } + return true; +} + +bool BackPropPattern::DestOpndLiveOutToEHSuccs(Insn &insn) { + BB &bb = *insn.GetBB(); + for (auto ehSucc : bb.GetEhSuccs()) { + if (ehSucc->GetLiveIn()->TestBit(firstRegNO)) { + return true; + } + } + return false; +} + +bool BackPropPattern::CheckSrcOpndDefAndUseInsns(Insn &insn) { + BB &bb = *insn.GetBB(); + /* secondOpnd is defined in other BB */ + std::vector defInsnVec = cgFunc.GetRD()->FindRegDefBetweenInsn(secondRegNO, bb.GetFirstInsn(), insn.GetPrev()); + if (defInsnVec.size() != 1) { + return false; + } + defInsnForSecondOpnd = defInsnVec.back(); + /* part defined */ + if ((defInsnForSecondOpnd->GetMachineOpcode() == MOP_xmovkri16) || + (defInsnForSecondOpnd->GetMachineOpcode() == MOP_wmovkri16)) { + return false; + } + bool findFinish = cgFunc.GetRD()->FindRegUseBetweenInsn(secondRegNO, defInsnForSecondOpnd->GetNext(), + bb.GetLastInsn(), srcOpndUseInsnSet); + if (!findFinish && bb.GetLiveOut()->TestBit(secondRegNO)) { + return false; + } + return true; +} + +bool BackPropPattern::CheckPredefineInsn(Insn &insn) { + if (insn.GetPrev() == defInsnForSecondOpnd) { + return true; + } + std::vector preDefInsnForFirstOpndVec; + BB &bb = *insn.GetBB(); + if (cgFunc.GetRD()->CheckRegGen(bb, firstRegNO)) { + preDefInsnForFirstOpndVec = + cgFunc.GetRD()->FindRegDefBetweenInsn(firstRegNO, defInsnForSecondOpnd->GetNext(), insn.GetPrev()); + } + if (!preDefInsnForFirstOpndVec.empty()) { + return false; + } + /* there is no predefine insn in current bb */ + InsnSet useInsnSetForFirstOpnd; + cgFunc.GetRD()->FindRegUseBetweenInsn(firstRegNO, defInsnForSecondOpnd->GetNext(), insn.GetPrev(), + useInsnSetForFirstOpnd); + if (!useInsnSetForFirstOpnd.empty()) { + return false; + } + return true; +} + +bool BackPropPattern::CheckRedefineInsn(Insn &insn) { + for (auto useInsn : srcOpndUseInsnSet) { + if ((useInsn->GetId() > insn.GetId()) && (insn.GetNext() != useInsn) && + !cgFunc.GetRD()->FindRegDefBetweenInsn(firstRegNO, insn.GetNext(), useInsn->GetPrev()).empty()) { + return false; + } + } + return true; +} + +bool BackPropPattern::CheckCondition(Insn &insn) { + if (!CheckAndGetOpnd(insn)) { + return false; + } + if (!DestOpndHasUseInsns(insn)) { + return false; + } + /* first register must not be live out to eh_succs */ + if (DestOpndLiveOutToEHSuccs(insn)) { + return false; + } + if (!CheckSrcOpndDefAndUseInsns(insn)) { + return false; + } + /* check predefine insn */ + if (!CheckPredefineInsn(insn)) { + return false; + } + /* check redefine insn */ + if (!CheckRedefineInsn(insn)) { + return false; + } + return true; +} + +void BackPropPattern::Optimize(Insn &insn) { + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + ReplaceAllUsedOpndWithNewOpnd(srcOpndUseInsnSet, secondRegNO, firstOpnd, false); + /* replace define insn */ + const AArch64MD *md = &AArch64CG::kMd[static_cast(defInsnForSecondOpnd)->GetMachineOpcode()]; + uint32 opndNum = defInsnForSecondOpnd->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = defInsnForSecondOpnd->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsRegDef() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == secondRegNO)) { + defInsnForSecondOpnd->SetOperand(i, firstOpnd); + } else if (opnd.IsMemoryAccessOperand()) { + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if (base != nullptr && memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && base->GetRegisterNumber() == secondRegNO) { + MemOperand *newMem = static_cast(opnd.Clone(*cgFunc.GetMemoryPool())); + CHECK_FATAL(newMem != nullptr, "null ptr check"); + newMem->SetBaseRegister(static_cast(firstOpnd)); + defInsnForSecondOpnd->SetOperand(i, *newMem); + } + } + } + insn.GetBB()->RemoveInsn(insn); +} + +void BackPropPattern::Init() { + firstRegOpnd = nullptr; + secondRegOpnd = nullptr; + firstRegNO = 0; + secondRegNO = 0; + srcOpndUseInsnSet.clear(); + defInsnForSecondOpnd = nullptr; +} + +void BackPropPattern::Run() { + bool secondTime = false; + std::set modifiedBB; + do { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsUnreachable() || (secondTime && modifiedBB.find(bb) == modifiedBB.end())) { + continue; + } + + if (secondTime) { + modifiedBB.erase(bb); + } + + FOR_BB_INSNS_REV(insn, bb) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + (void)modifiedBB.insert(bb); + Optimize(*insn); + } + cgFunc.GetRD()->UpdateInOut(*bb); + } + secondTime = true; + } while (!modifiedBB.empty()); +} + +bool CmpCsetPattern::CheckCondition(Insn &insn) { + nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr || !insn.IsMachineInstruction()) { + return false; + } + + MOperator firstMop = insn.GetMachineOpcode(); + MOperator secondMop = nextInsn->GetMachineOpcode(); + if (!(((firstMop == MOP_wcmpri) || (firstMop == MOP_xcmpri)) && + ((secondMop == MOP_wcsetrc) || (secondMop == MOP_xcsetrc)))) { + return false; + } + + /* get cmp_first operand */ + cmpFirstOpnd = &(insn.GetOperand(kInsnSecondOpnd)); + /* get cmp second Operand, ImmOperand must be 0 or 1 */ + cmpSecondOpnd = &(insn.GetOperand(kInsnThirdOpnd)); + ASSERT(cmpSecondOpnd->IsIntImmediate(), "expects ImmOperand"); + ImmOperand *cmpConstOpnd = static_cast(cmpSecondOpnd); + cmpConstVal = cmpConstOpnd->GetValue(); + /* get cset first Operand */ + csetFirstOpnd = &(nextInsn->GetOperand(kInsnFirstOpnd)); + if (((cmpConstVal != 0) && (cmpConstVal != 1)) || (cmpFirstOpnd->GetSize() != csetFirstOpnd->GetSize()) || + !OpndDefByOneOrZero(insn, 1)) { + return false; + } + + InsnSet useInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(insn, 0, false); + if (useInsnSet.size() > 1) { + return false; + } + return true; +} + +void CmpCsetPattern::Optimize(Insn &insn) { + Insn *csetInsn = nextInsn; + BB &bb = *insn.GetBB(); + nextInsn = nextInsn->GetNextMachineInsn(); + /* get condition Operand */ + CondOperand &cond = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if (((cmpConstVal == 0) && (cond.GetCode() == CC_NE)) || ((cmpConstVal == 1) && (cond.GetCode() == CC_EQ))) { + if (RegOperand::IsSameReg(*cmpFirstOpnd, *csetFirstOpnd)) { + bb.RemoveInsn(insn); + bb.RemoveInsn(*csetInsn); + } else { + MOperator mopCode = (cmpFirstOpnd->GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, *csetFirstOpnd, *cmpFirstOpnd); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } else if (((cmpConstVal == 1) && (cond.GetCode() == CC_NE)) || + ((cmpConstVal == 0) && (cond.GetCode() == CC_EQ))) { + MOperator mopCode = (cmpFirstOpnd->GetSize() == k64BitSize) ? MOP_xeorrri13 : MOP_weorrri12; + constexpr int64 eorImm = 1; + auto &aarch64CGFunc = static_cast(cgFunc); + ImmOperand &one = aarch64CGFunc.CreateImmOperand(eorImm, k8BitSize, false); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, *csetFirstOpnd, *cmpFirstOpnd, one); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + + cgFunc.GetRD()->UpdateInOut(bb, true); +} + +void CmpCsetPattern::Init() { + cmpConstVal = 0; + cmpFirstOpnd = nullptr; + cmpSecondOpnd = nullptr; + csetFirstOpnd = nullptr; +} + +void CmpCsetPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS(insn, bb) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } +} + +AArch64CC_t CselPattern::GetInverseCondCode(const CondOperand &cond) const { + switch (cond.GetCode()) { + case CC_NE: + return CC_EQ; + case CC_EQ: + return CC_NE; + case CC_LT: + return CC_GE; + case CC_GE: + return CC_LT; + case CC_GT: + return CC_LE; + case CC_LE: + return CC_GT; + default: + return kCcLast; + } +} + +bool CselPattern::CheckCondition(Insn &insn) { + MOperator mopCode = insn.GetMachineOpcode(); + if ((mopCode != MOP_xcselrrrc) && (mopCode != MOP_wcselrrrc)) { + return false; + } + return true; +} + +void CselPattern::Optimize(Insn &insn) { + BB &bb = *insn.GetBB(); + Operand &opnd0 = insn.GetOperand(kInsnFirstOpnd); + Operand &cond = insn.GetOperand(kInsnFourthOpnd); + MOperator newMop = ((opnd0.GetSize()) == k64BitSize ? MOP_xcsetrc : MOP_wcsetrc); + + if (OpndDefByOne(insn, kInsnSecondOpnd) && OpndDefByZero(insn, kInsnThirdOpnd)) { + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(newMop, opnd0, cond); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + cgFunc.GetRD()->InitGenUse(bb, false); + } else if (OpndDefByZero(insn, kInsnSecondOpnd) && OpndDefByOne(insn, kInsnThirdOpnd)) { + CondOperand &originCond = static_cast(cond); + AArch64CC_t inverseCondCode = GetInverseCondCode(originCond); + if (inverseCondCode == kCcLast) { + return; + } + auto &aarchCGFunc = static_cast(cgFunc); + CondOperand &inverseCond = aarchCGFunc.GetCondOperand(inverseCondCode); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(newMop, opnd0, inverseCond); + newInsn.SetId(insn.GetId()); + bb.ReplaceInsn(insn, newInsn); + cgFunc.GetRD()->InitGenUse(bb, false); + } +} + +void CselPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } +} + +uint32 RedundantUxtPattern::GetInsnValidBit(Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + uint32 nRet; + switch (mOp) { + case MOP_wcsetrc: + case MOP_xcsetrc: + nRet = 1; + break; + case MOP_wldrb: + case MOP_wldrsb: + case MOP_wldarb: + case MOP_wldxrb: + case MOP_wldaxrb: + nRet = k8BitSize; + break; + case MOP_wldrh: + case MOP_wldrsh: + case MOP_wldarh: + case MOP_wldxrh: + case MOP_wldaxrh: + nRet = k16BitSize; + break; + case MOP_wmovrr: + case MOP_xmovri32: + case MOP_wldli: + case MOP_wldr: + case MOP_wldp: + case MOP_wldar: + case MOP_wmovkri16: + case MOP_wmovzri16: + case MOP_wmovnri16: + case MOP_wldxr: + case MOP_wldaxr: + case MOP_wldaxp: + case MOP_wcsincrrrc: + case MOP_wcselrrrc: + case MOP_wcsinvrrrc: + nRet = k32BitSize; + break; + default: + nRet = k64BitSize; + break; + } + return nRet; +} + +uint32 RedundantUxtPattern::GetMaximumValidBit(Insn &insn, uint8 index, InsnSet &visitedInsn) const { + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(insn, index); + ASSERT(!defInsnSet.empty(), "operand must be defined before used"); + + uint32 validBit = 0; + uint32 nMaxValidBit = 0; + for (auto &defInsn : defInsnSet) { + if (visitedInsn.find(defInsn) != visitedInsn.end()) { + continue; + } + + (void)visitedInsn.insert(defInsn); + MOperator mOp = defInsn->GetMachineOpcode(); + if ((mOp == MOP_wmovrr) || (mOp == MOP_xmovrr)) { + validBit = GetMaximumValidBit(*defInsn, 1, visitedInsn); + } else { + validBit = GetInsnValidBit(*defInsn); + } + + nMaxValidBit = nMaxValidBit < validBit ? validBit : nMaxValidBit; + } + return nMaxValidBit; +} + +bool RedundantUxtPattern::CheckCondition(Insn &insn) { + BB &bb = *insn.GetBB(); + InsnSet visitedInsn1; + InsnSet visitedInsn2; + if (!((insn.GetMachineOpcode() == MOP_xuxth32 && + GetMaximumValidBit(insn, kInsnSecondOpnd, visitedInsn1) <= k16BitSize) || + (insn.GetMachineOpcode() == MOP_xuxtb32 && + GetMaximumValidBit(insn, kInsnSecondOpnd, visitedInsn2) <= k8BitSize))) { + return false; + } + + Operand &firstOpnd = insn.GetOperand(kInsnFirstOpnd); + secondOpnd = &(insn.GetOperand(kInsnSecondOpnd)); + if (RegOperand::IsSameReg(firstOpnd, *secondOpnd)) { + bb.RemoveInsn(insn); + /* update in/out */ + cgFunc.GetRD()->UpdateInOut(bb, true); + return false; + } + useInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(insn, 0, false); + RegOperand &firstRegOpnd = static_cast(firstOpnd); + firstRegNO = firstRegOpnd.GetRegisterNumber(); + /* for uxth R1, V501, R1 is parameter register, this can't be optimized. */ + if (firstRegOpnd.IsPhysicalRegister()) { + return false; + } + + if (useInsnSet.empty()) { + bb.RemoveInsn(insn); + /* update in/out */ + cgFunc.GetRD()->UpdateInOut(bb, true); + return false; + } + + RegOperand *secondRegOpnd = static_cast(secondOpnd); + ASSERT(secondRegOpnd != nullptr, "secondRegOpnd should not be nullptr"); + uint32 secondRegNO = secondRegOpnd->GetRegisterNumber(); + for (auto useInsn : useInsnSet) { + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(*useInsn, firstRegNO, true); + if ((defInsnSet.size() > 1) || !(cgFunc.GetRD()->RegIsLiveBetweenInsn(secondRegNO, insn, *useInsn))) { + return false; + } + } + return true; +} + +void RedundantUxtPattern::Optimize(Insn &insn) { + BB &bb = *insn.GetBB(); + ReplaceAllUsedOpndWithNewOpnd(useInsnSet, firstRegNO, *secondOpnd, true); + bb.RemoveInsn(insn); + cgFunc.GetRD()->UpdateInOut(bb, true); +} + +void RedundantUxtPattern::Init() { + useInsnSet.clear(); + secondOpnd = nullptr; +} + +void RedundantUxtPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsUnreachable()) { + continue; + } + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + Init(); + if (!CheckCondition(*insn)) { + continue; + } + Optimize(*insn); + } + } +} + +bool LocalVarSaveInsnPattern::CheckFirstInsn(Insn &firstInsn) { + MOperator mOp = firstInsn.GetMachineOpcode(); + if (mOp != MOP_xmovrr && mOp != MOP_wmovrr) { + return false; + } + firstInsnSrcOpnd = &(firstInsn.GetOperand(kInsnSecondOpnd)); + RegOperand *firstInsnSrcReg = static_cast(firstInsnSrcOpnd); + if (firstInsnSrcReg->GetRegisterNumber() != R0) { + return false; + } + firstInsnDestOpnd = &(firstInsn.GetOperand(kInsnFirstOpnd)); + RegOperand *firstInsnDestReg = static_cast(firstInsnDestOpnd); + if (firstInsnDestReg->IsPhysicalRegister()) { + return false; + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckSecondInsn() { + MOperator mOp = secondInsn->GetMachineOpcode(); + if (mOp != MOP_wstr && mOp != MOP_xstr) { + return false; + } + secondInsnSrcOpnd = &(secondInsn->GetOperand(kInsnFirstOpnd)); + if (!RegOperand::IsSameReg(*firstInsnDestOpnd, *secondInsnSrcOpnd)) { + return false; + } + /* check memOperand is stack memOperand, and x0 is stored in localref var region */ + secondInsnDestOpnd = &(secondInsn->GetOperand(kInsnSecondOpnd)); + AArch64MemOperand *secondInsnDestMem = static_cast(secondInsnDestOpnd); + RegOperand *baseReg = secondInsnDestMem->GetBaseRegister(); + RegOperand *indexReg = secondInsnDestMem->GetIndexRegister(); + if ((baseReg == nullptr) || !(cgFunc.IsFrameReg(*baseReg)) || (indexReg != nullptr)) { + return false; + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckAndGetUseInsn(Insn &firstInsn) { + InsnSet useInsnSet = cgFunc.GetRD()->FindUseForRegOpnd(firstInsn, kInsnFirstOpnd, false); + if (useInsnSet.size() != 2) { /* secondInsn and another useInsn */ + return false; + } + + /* useInsnSet includes secondInsn and another useInsn */ + for (auto tmpUseInsn : useInsnSet) { + if (tmpUseInsn->GetId() != secondInsn->GetId()) { + useInsn = tmpUseInsn; + break; + } + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckLiveRange(Insn &firstInsn) { + uint32 maxInsnNO = cgFunc.GetRD()->GetMaxInsnNO(); + uint32 useInsnID = useInsn->GetId(); + uint32 defInsnID = firstInsn.GetId(); + uint32 distance = useInsnID > defInsnID ? useInsnID - defInsnID : defInsnID - useInsnID; + float liveRangeProportion = static_cast(distance) / maxInsnNO; + /* 0.3 is a balance for real optimization effect */ + if (liveRangeProportion < 0.3) { + return false; + } + return true; +} + +bool LocalVarSaveInsnPattern::CheckCondition(Insn &firstInsn) { + secondInsn = firstInsn.GetNext(); + if (secondInsn == nullptr) { + return false; + } + /* check firstInsn is : mov vreg, R0; */ + if (!CheckFirstInsn(firstInsn)) { + return false; + } + /* check the secondInsn is : str vreg, stackMem */ + if (!CheckSecondInsn()) { + return false; + } + /* find the uses of the vreg */ + if (!CheckAndGetUseInsn(firstInsn)) { + return false; + } + /* simulate live range using insn distance */ + if (!CheckLiveRange(firstInsn)) { + return false; + } + RegOperand *firstInsnDestReg = static_cast(firstInsnDestOpnd); + regno_t firstInsnDestRegNO = firstInsnDestReg->GetRegisterNumber(); + InsnSet defInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(*useInsn, firstInsnDestRegNO, true); + if (defInsnSet.size() != 1) { + return false; + } + ASSERT((*(defInsnSet.begin()))->GetId() == firstInsn.GetId(), "useInsn has only one define Insn : firstInsn"); + /* check whether the stack mem is changed or not */ + AArch64MemOperand *secondInsnDestMem = static_cast(secondInsnDestOpnd); + int64 memOffset = secondInsnDestMem->GetOffsetImmediate()->GetOffsetValue(); + InsnSet memDefInsnSet = cgFunc.GetRD()->FindDefForMemOpnd(*useInsn, memOffset, true); + if (memDefInsnSet.size() != 1) { + return false; + } + if ((*(memDefInsnSet.begin()))->GetId() != secondInsn->GetId()) { + return false; + } + /* check whether has call between use and def */ + if (!cgFunc.GetRD()->HasCallBetweenDefUse(firstInsn, *useInsn)) { + return false; + } + return true; +} + +void LocalVarSaveInsnPattern::Optimize(Insn &insn) { + /* insert ldr insn before useInsn */ + MOperator ldrOpCode = secondInsnSrcOpnd->GetSize() == k64BitSize ? MOP_xldr : MOP_wldr; + Insn &ldrInsn = cgFunc.GetCG()->BuildInstruction(ldrOpCode, *secondInsnSrcOpnd, *secondInsnDestOpnd); + ldrInsn.SetId(useInsn->GetId() - 1); + useInsn->GetBB()->InsertInsnBefore(*useInsn, ldrInsn); + cgFunc.GetRD()->UpdateInOut(*useInsn->GetBB(), true); + secondInsn->SetOperand(kInsnFirstOpnd, *firstInsnSrcOpnd); + BB *saveInsnBB = insn.GetBB(); + saveInsnBB->RemoveInsn(insn); + cgFunc.GetRD()->UpdateInOut(*saveInsnBB, true); +} + +void LocalVarSaveInsnPattern::Init() { + firstInsnSrcOpnd = nullptr; + firstInsnDestOpnd = nullptr; + secondInsnSrcOpnd = nullptr; + secondInsnDestOpnd = nullptr; + useInsn = nullptr; + secondInsn = nullptr; +} + +void LocalVarSaveInsnPattern::Run() { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsCleanup()) { + continue; + } + FOR_BB_INSNS(insn, bb) { + if (!insn->IsCall()) { + continue; + } + Insn *firstInsn = insn->GetNextMachineInsn(); + if (firstInsn == nullptr) { + continue; + } + Init(); + if (!CheckCondition(*firstInsn)) { + continue; + } + Optimize(*firstInsn); + } + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_ico.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_ico.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4fa80df1d63c30b2b2a56256fb355db482d8d05e --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_ico.cpp @@ -0,0 +1,480 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_ico.h" +#include "ico.h" +#include "cg.h" +#include "cg_option.h" +#include "riscv64_isa.h" +#include "riscv64_insn.h" +#include "riscv64_cgfunc.h" + +/* + * This phase implements if-conversion optimization, + * which tries to convert conditional branches into cset/csel instructions + */ +#define ICO_DUMP CG_DEBUG_FUNC(cgFunc) +namespace maplebe { +void AArch64IfConversionOptimizer::InitOptimizePatterns() { + singlePassPatterns.emplace_back(memPool->New(*cgFunc)); +} + +Insn *AArch64ICOPattern::BuildCmpInsn(const Insn &condBr) { + AArch64CGFunc *func = static_cast(cgFunc); + RegOperand ® = static_cast(condBr.GetOperand(0)); + PrimType ptyp = (reg.GetSize() == k64BitSize) ? PTY_u64 : PTY_u32; + ImmOperand &numZero = func->CreateImmOperand(ptyp, 0); + Operand &rflag = func->GetOrCreateRflag(); + MOperator mopCode = (reg.GetSize() == k64BitSize) ? MOP_xcmpri : MOP_wcmpri; + Insn &cmpInsn = func->GetCG()->BuildInstruction(mopCode, rflag, reg, numZero); + return &cmpInsn; +} + +bool AArch64ICOPattern::IsSetInsn(const Insn &insn, Operand *&dest, Operand *&src) const { + MOperator mOpCode = insn.GetMachineOpcode(); + if (mOpCode >= MOP_xmovrr && mOpCode <= MOP_xvmovd) { + dest = &(insn.GetOperand(0)); + src = &(insn.GetOperand(1)); + return true; + } + dest = nullptr; + src = nullptr; + return false; +} + +AArch64CC_t AArch64ICOPattern::Encode(MOperator mOp, bool inverse) const { + switch (mOp) { + case MOP_bmi: + return inverse ? CC_PL : CC_MI; + case MOP_bvc: + return inverse ? CC_VS : CC_VC; + case MOP_bls: + return inverse ? CC_HI : CC_LS; + case MOP_blt: + return inverse ? CC_GE : CC_LT; + case MOP_ble: + return inverse ? CC_GT : CC_LE; + case MOP_beq: + return inverse ? CC_NE : CC_EQ; + case MOP_bne: + return inverse ? CC_EQ : CC_NE; + case MOP_blo: + return inverse ? CC_HS : CC_LO; + case MOP_bpl: + return inverse ? CC_MI : CC_PL; + case MOP_bhs: + return inverse ? CC_LO : CC_HS; + case MOP_bvs: + return inverse ? CC_VC : CC_VS; + case MOP_bhi: + return inverse ? CC_LS : CC_HI; + case MOP_bgt: + return inverse ? CC_LE : CC_GT; + case MOP_bge: + return inverse ? CC_LT : CC_GE; + case MOP_wcbnz: + return inverse ? CC_EQ : CC_NE; + case MOP_xcbnz: + return inverse ? CC_EQ : CC_NE; + case MOP_wcbz: + return inverse ? CC_NE : CC_EQ; + case MOP_xcbz: + return inverse ? CC_NE : CC_EQ; + default: + return kCcLast; + } +} + +Insn *AArch64ICOPattern::BuildCondSet(const Insn &branch, RegOperand ®, bool inverse) { + AArch64CC_t ccCode = Encode(branch.GetMachineOpcode(), inverse); + ASSERT(ccCode != kCcLast, "unknown cond, ccCode can't be kCcLast"); + AArch64CGFunc *func = static_cast(cgFunc); + CondOperand &cond = func->GetCondOperand(ccCode); + MOperator mopCode = (reg.GetSize() == k64BitSize) ? MOP_xcsetrc : MOP_wcsetrc; + return &func->GetCG()->BuildInstruction(mopCode, reg, cond); +} + +Insn *AArch64ICOPattern::BuildCondSel(const Insn &branch, MOperator mOp, RegOperand &dst, RegOperand &src1, + RegOperand &src2) { + AArch64CC_t ccCode = Encode(branch.GetMachineOpcode(), false); + ASSERT(ccCode != kCcLast, "unknown cond, ccCode can't be kCcLast"); + CondOperand &cond = static_cast(cgFunc)->GetCondOperand(ccCode); + return &cgFunc->GetCG()->BuildInstruction(mOp, dst, src1, src2, cond); +} + +void AArch64ICOPattern::GenerateInsnForImm(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, + RegOperand &destReg, std::vector &generateInsn) { + ImmOperand &imm1 = static_cast(ifDest); + ImmOperand &imm2 = static_cast(elseDest); + bool inverse = imm1.IsZero() && imm2.IsOne(); + if (inverse || (imm2.IsZero() && imm1.IsOne())) { + Insn *csetInsn = BuildCondSet(branchInsn, destReg, inverse); + ASSERT(csetInsn != nullptr, "build a insn failed"); + generateInsn.emplace_back(csetInsn); + } else if (imm1.GetValue() == imm2.GetValue()) { + MOperator mOp = (destReg.GetSize() == k64BitSize ? MOP_xmovri64 : MOP_xmovri32); + Insn &tempInsn = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, destReg, + imm1); + generateInsn.emplace_back(&tempInsn); + } else { + MOperator mOp = (destReg.GetSize() == k64BitSize ? MOP_xmovri64 : MOP_xmovri32); + RegOperand *tempTarIf = cgFunc->GetTheCFG()->CreateVregFromReg(destReg); + Insn &tempInsnIf = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, *tempTarIf, + imm1); + generateInsn.emplace_back(&tempInsnIf); + + RegOperand *tempTarElse = cgFunc->GetTheCFG()->CreateVregFromReg(destReg); + Insn &tempInsnElse = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, *tempTarElse, + imm2); + generateInsn.emplace_back(&tempInsnElse); + + uint32 dSize = destReg.GetSize(); + bool isIntTy = destReg.IsOfIntClass(); + MOperator mOpCode = isIntTy ? (dSize == k64BitSize ? MOP_xcselrrrc : MOP_wcselrrrc) + : (dSize == k64BitSize ? MOP_dcselrrrc : (dSize == k32BitSize ? + MOP_scselrrrc : MOP_hcselrrrc)); + Insn *cselInsn = BuildCondSel(branchInsn, mOpCode, destReg, *tempTarIf, *tempTarElse); + CHECK_FATAL(cselInsn != nullptr, "build a csel insn failed"); + generateInsn.emplace_back(cselInsn); + } +} + +RegOperand *AArch64ICOPattern::GenerateRegAndTempInsn(Operand &dest, const RegOperand &destReg, + std::vector &generateInsn) { + RegOperand *reg = nullptr; + if (!dest.IsRegister()) { + MOperator mOp = (destReg.GetSize() == k64BitSize ? MOP_xmovri64 : MOP_xmovri32); + reg = cgFunc->GetTheCFG()->CreateVregFromReg(destReg); + ImmOperand &tempSrcElse = static_cast(dest); + Insn &tempInsn = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, *reg, + tempSrcElse); + generateInsn.emplace_back(&tempInsn); + return reg; + } else { + return (static_cast(&dest)); + } +} + +void AArch64ICOPattern::GenerateInsnForReg(const Insn &branchInsn, Operand &ifDest, Operand &elseDest, + RegOperand &destReg, std::vector &generateInsn) { + RegOperand *tReg = GenerateRegAndTempInsn(ifDest, destReg, generateInsn); + RegOperand *eReg = GenerateRegAndTempInsn(elseDest, destReg, generateInsn); + + /* mov w0, w1 mov w0, w1 --> mov w0, w1 */ + if (eReg->GetRegisterNumber() == tReg->GetRegisterNumber()) { + uint32 dSize = destReg.GetSize(); + bool srcIsIntTy = tReg->IsOfIntClass(); + bool destIsIntTy = destReg.IsOfIntClass(); + MOperator mOp; + if (dSize == k64BitSize) { + mOp = srcIsIntTy ? (destIsIntTy ? MOP_xmovrr : MOP_xvmovdr) : (destIsIntTy ? MOP_xvmovrd : MOP_xvmovd); + } else { + mOp = srcIsIntTy ? (destIsIntTy ? MOP_wmovrr : MOP_xvmovsr) : (destIsIntTy ? MOP_xvmovrs : MOP_xvmovs); + } + Insn &tempInsnIf = + cgFunc->GetTheCFG()->GetInsnModifier()->GetCGFunc()->GetCG()->BuildInstruction(mOp, destReg, + *tReg); + generateInsn.emplace_back(&tempInsnIf); + } else { + uint32 dSize = destReg.GetSize(); + bool isIntTy = destReg.IsOfIntClass(); + MOperator mOpCode = isIntTy ? (dSize == k64BitSize ? MOP_xcselrrrc : MOP_wcselrrrc) + : (dSize == k64BitSize ? MOP_dcselrrrc : (dSize == k32BitSize ? + MOP_scselrrrc : MOP_hcselrrrc)); + Insn *cselInsn = BuildCondSel(branchInsn, mOpCode, destReg, *tReg, *eReg); + CHECK_FATAL(cselInsn != nullptr, "build a csel insn failed"); + generateInsn.emplace_back(cselInsn); + } +} + +Operand *AArch64ICOPattern::GetDestReg(const std::map &destSrcMap, + const RegOperand &destReg) const { + Operand *dest = nullptr; + for (const auto &destSrcPair : destSrcMap) { + ASSERT(destSrcPair.first->IsRegister(), "opnd must be register"); + RegOperand *destRegInMap = static_cast(destSrcPair.first); + ASSERT(destRegInMap != nullptr, "nullptr check"); + if (destRegInMap->GetRegisterNumber() == destReg.GetRegisterNumber()) { + dest = destSrcPair.second; + break; + } + } + return dest; +} + +bool AArch64ICOPattern::BuildCondMovInsn(BB &cmpBB, const BB &bb, const std::map &ifDestSrcMap, + const std::map &elseDestSrcMap, + bool elseBBIsProcessed, std::vector &generateInsn) { + Insn *branchInsn = cgFunc->GetTheCFG()->FindLastCondBrInsn(cmpBB); + FOR_BB_INSNS_CONST(insn, (&bb)) { + if (!insn->IsMachineInstruction() || insn->IsBranch()) { + continue; + } + Operand *dest = nullptr; + Operand *src = nullptr; + + if (!IsSetInsn(*insn, dest, src)) { + ASSERT(false, "insn check"); + } + ASSERT(dest->IsRegister(), "register check"); + RegOperand *destReg = static_cast(dest); + + Operand *elseDest = GetDestReg(elseDestSrcMap, *destReg); + Operand *ifDest = GetDestReg(ifDestSrcMap, *destReg); + + if (elseBBIsProcessed) { + if (elseDest != nullptr) { + continue; + } + elseDest = dest; + ASSERT(ifDest != nullptr, "null ptr check"); + if (!bb.GetLiveOut()->TestBit(destReg->GetRegisterNumber())) { + continue; + } + } else { + ASSERT(elseDest != nullptr, "null ptr check"); + if (ifDest == nullptr) { + if (!bb.GetLiveOut()->TestBit(destReg->GetRegisterNumber())) { + continue; + } + ifDest = dest; + } + } + + /* generate cset or csel instruction */ + ASSERT(ifDest != nullptr, "null ptr check"); + if (ifDest->IsIntImmediate() && elseDest->IsIntImmediate()) { + GenerateInsnForImm(*branchInsn, *ifDest, *elseDest, *destReg, generateInsn); + } else { + GenerateInsnForReg(*branchInsn, *ifDest, *elseDest, *destReg, generateInsn); + } + } + + return true; +} + +bool AArch64ICOPattern::CheckModifiedRegister(Insn &insn, std::map &destSrcMap, Operand &src, + Operand &dest) const { +/* src was modified in this blcok earlier */ + if (src.IsRegister()) { + RegOperand &srcReg = static_cast(src); + for (const auto &destSrcPair : destSrcMap) { + ASSERT(destSrcPair.first->IsRegister(), "opnd must be register"); + RegOperand *mapSrcReg = static_cast(destSrcPair.first); + if (mapSrcReg->GetRegisterNumber() == srcReg.GetRegisterNumber()) { + return false; + } + } + } + + /* dest register was modified earlier in this block */ + ASSERT(dest.IsRegister(), "opnd must be register"); + RegOperand &destReg = static_cast(dest); + for (const auto &destSrcPair : destSrcMap) { + ASSERT(destSrcPair.first->IsRegister(), "opnd must be register"); + RegOperand *mapSrcReg = static_cast(destSrcPair.first); + if (mapSrcReg->GetRegisterNumber() == destReg.GetRegisterNumber()) { + return false; + } + } + + /* src register is modified later in this block, will not be processed */ + if (src.IsRegister()) { + RegOperand &srcReg = static_cast(src); + if (destReg.IsOfFloatOrSIMDClass() && srcReg.IsZeroRegister()) { + return false; + } + for (Insn *tmpInsn = &insn; tmpInsn != nullptr; tmpInsn = tmpInsn->GetNext()) { + Operand *tmpDest = nullptr; + Operand *tmpSrc = nullptr; + if (IsSetInsn(*tmpInsn, tmpDest, tmpSrc) && tmpDest->Equals(src)) { + ASSERT(tmpDest->IsRegister(), "opnd must be register"); + RegOperand *tmpDestReg = static_cast(tmpDest); + if (srcReg.GetRegisterNumber() == tmpDestReg->GetRegisterNumber()) { + return false; + } + } + } + } + return true; +} + +bool AArch64ICOPattern::CheckCondMoveBB(BB *bb, std::map &destSrcMap, + std::vector &destRegs, Operand *flagOpnd) const { + if (bb == nullptr) { + return false; + } + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction() || insn->IsBranch()) { + continue; + } + Operand *dest = nullptr; + Operand *src = nullptr; + + if (!IsSetInsn(*insn, dest, src)) { + return false; + } + ASSERT(dest != nullptr, "null ptr check"); + ASSERT(src != nullptr, "null ptr check"); + + if (!dest->IsRegister()) { + return false; + } + + if (!src->IsConstant() && !src->IsRegister()) { + return false; + } + + if (flagOpnd != nullptr) { + RegOperand *flagReg = static_cast(flagOpnd); + regno_t flagRegNO = flagReg->GetRegisterNumber(); + if (bb->GetLiveOut()->TestBit(flagRegNO)) { + return false; + } + } + + if (!CheckModifiedRegister(*insn, destSrcMap, *src, *dest)) { + return false; + } + + (void)destSrcMap.insert(std::make_pair(dest, src)); + destRegs.emplace_back(dest); + } + return true; +} + +/* Convert conditional branches into cset/csel instructions */ +bool AArch64ICOPattern::DoOpt(BB &cmpBB, BB *ifBB, BB *elseBB, BB &joinBB) { + Insn *condBr = cgFunc->GetTheCFG()->FindLastCondBrInsn(cmpBB); + ASSERT(condBr != nullptr, "nullptr check"); + Insn *cmpInsn = FindLastCmpInsn(cmpBB); + Operand *flagOpnd = nullptr; + /* for cbnz and cbz institution */ + if (cgFunc->GetTheCFG()->IsCompareAndBranchInsn(*condBr)) { + if (condBr->GetOperand(0).IsZeroRegister()) { + return false; + } + cmpInsn = condBr; + flagOpnd = &(condBr->GetOperand(0)); + } + + /* tbz will not be optimized */ + MOperator mOperator = condBr->GetMachineOpcode(); + if (mOperator == MOP_xtbz || mOperator == MOP_wtbz || mOperator == MOP_xtbnz || mOperator == MOP_wtbnz) { + return false; + } + if (cmpInsn == nullptr) { + return false; + } + + std::vector ifDestRegs; + std::vector elseDestRegs; + + std::map ifDestSrcMap; + std::map elseDestSrcMap; + + if (!CheckCondMoveBB(elseBB, elseDestSrcMap, elseDestRegs, flagOpnd) || + (ifBB != nullptr && !CheckCondMoveBB(ifBB, ifDestSrcMap, ifDestRegs, flagOpnd))) { + return false; + } + + size_t count = elseDestRegs.size(); + + for (auto *itr : ifDestRegs) { + bool foundInElse = false; + for (auto *elseItr : elseDestRegs) { + RegOperand *elseDestReg = static_cast(elseItr); + RegOperand *ifDestReg = static_cast(itr); + if (ifDestReg->GetRegisterNumber() == elseDestReg->GetRegisterNumber()) { + foundInElse = true; + break; + } + } + if (foundInElse) { + continue; + } else { + ++count; + } + } + if (count > kThreshold) { + return false; + } + + /* generate insns */ + std::vector elseGenerateInsn; + std::vector ifGenerateInsn; + bool elseBBProcessResult = false; + if (elseBB != nullptr) { + elseBBProcessResult = BuildCondMovInsn(cmpBB, *elseBB, ifDestSrcMap, elseDestSrcMap, false, elseGenerateInsn); + } + bool ifBBProcessResult = false; + if (ifBB != nullptr) { + ifBBProcessResult = BuildCondMovInsn(cmpBB, *ifBB, ifDestSrcMap, elseDestSrcMap, true, ifGenerateInsn); + } + if (!elseBBProcessResult || (ifBB != nullptr && !ifBBProcessResult)) { + return false; + } + + /* insert insn */ + if (cgFunc->GetTheCFG()->IsCompareAndBranchInsn(*condBr)) { + Insn *innerCmpInsn = BuildCmpInsn(*condBr); + cmpBB.InsertInsnBefore(*condBr, *innerCmpInsn); + cmpInsn = innerCmpInsn; + } + + if (elseBB != nullptr) { + cmpBB.SetKind(elseBB->GetKind()); + } else { + cmpBB.SetKind(ifBB->GetKind()); + } + /* delete condBr */ + cmpBB.RemoveInsn(*condBr); + /* Insert goto insn after csel insn. */ + if (cmpBB.GetKind() == BB::kBBGoto || cmpBB.GetKind() == BB::kBBIf) { + if (elseBB != nullptr) { + cmpBB.InsertInsnAfter(*cmpBB.GetLastInsn(), *elseBB->GetLastInsn()); + } else { + cmpBB.InsertInsnAfter(*cmpBB.GetLastInsn(), *ifBB->GetLastInsn()); + } + } + + /* Insert instructions in branches after cmpInsn */ + for (auto itr = elseGenerateInsn.rbegin(); itr != elseGenerateInsn.rend(); ++itr) { + cmpBB.InsertInsnAfter(*cmpInsn, **itr); + } + for (auto itr = ifGenerateInsn.rbegin(); itr != ifGenerateInsn.rend(); ++itr) { + cmpBB.InsertInsnAfter(*cmpInsn, **itr); + } + + /* Remove branches and merge join */ + if (ifBB != nullptr) { + cgFunc->GetTheCFG()->RemoveBB(*ifBB); + } + if (elseBB != nullptr) { + cgFunc->GetTheCFG()->RemoveBB(*elseBB); + } + + if (cmpBB.GetKind() != BB::kBBIf && cmpBB.GetNext() == &joinBB && + !maplebe::CGCFG::InLSDA(joinBB.GetLabIdx(), *cgFunc->GetEHFunc()) && + cgFunc->GetTheCFG()->CanMerge(cmpBB, joinBB)) { + maplebe::CGCFG::MergeBB(cmpBB, joinBB, *cgFunc); + keepPosition = true; + } + return true; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_immediate.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_immediate.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5e565ae0e6ae2a651ecad3b1b8f3bca883995c57 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_immediate.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_immediate.h" +#include "common_utils.h" +#include "mpl_logging.h" + +#include +#include +#include + +namespace maplebe { +static std::set ValidBitmaskImmSet = { +#include "valid_bitmask_imm.txt" +}; + +namespace { +constexpr uint32 kMaxBitTableSize = 5; +#if DEBUG +constexpr uint32 kN16ChunksCheck = 2; +#endif +constexpr std::array bitmaskImmMultTable = { + 0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL, +}; +}; + +bool IsBitmaskImmediate(uint64 val, uint32 bitLen) { + ASSERT(val != 0, "IsBitmaskImmediate() don's accept 0 or -1"); + ASSERT(static_cast(val) != -1, "IsBitmaskImmediate() don's accept 0 or -1"); + if ((bitLen == k32BitSize) && (static_cast(val) == -1)) { + return false; + } + uint64 val2 = val; + if (bitLen == k32BitSize) { + val2 = (val2 << k32BitSize) | (val2 & ((1ULL << k32BitSize) - 1)); + } + bool expectedOutcome = (ValidBitmaskImmSet.find(val2) != ValidBitmaskImmSet.end()); + + if ((val & 0x1) != 0) { + /* + * we want to work with + * 0000000000000000000000000000000000000000000001100000000000000000 + * instead of + * 1111111111111111111111111111111111111111111110011111111111111111 + */ + val = ~val; + } + + if (bitLen == k32BitSize) { + val = (val << k32BitSize) | (val & ((1ULL << k32BitSize) - 1)); + } + + /* get the least significant bit set and add it to 'val' */ + uint64 tmpVal = val + (val & static_cast(-val)); + + /* now check if tmp is a power of 2 or tmpVal==0. */ + tmpVal = tmpVal & (tmpVal - 1); + if (tmpVal == 0) { + if (!expectedOutcome) { + LogInfo::MapleLogger() << "0x" << std::hex << std::setw(k16ByteSize) << std::setfill('0') << + static_cast(val) << "\n"; + return false; + } + ASSERT(expectedOutcome, "incorrect implementation: not valid value but returning true"); + /* power of two or zero ; return true */ + return true; + } + + int32 p0 = __builtin_ctzll(val); + int32 p1 = __builtin_ctzll(tmpVal); + int64 diff = p1 - p0; + + /* check if diff is a power of two; return false if not. */ + if ((static_cast(diff) & (static_cast(diff) - 1)) != 0) { + ASSERT(!expectedOutcome, "incorrect implementation: valid value but returning false"); + return false; + } + + int32 logDiff = __builtin_ctzll(diff); + int64 pattern = val & ((1ULL << static_cast(diff)) - 1); +#if DEBUG + bool ret = (val == pattern * bitmaskImmMultTable[kMaxBitTableSize - logDiff]); + ASSERT(expectedOutcome == ret, "incorrect implementation: return value does not match expected outcome"); + return ret; +#else + return val == pattern * bitmaskImmMultTable[kMaxBitTableSize - logDiff]; +#endif +} + +bool IsMoveWidableImmediate(uint64 val, uint32 bitLen) { + if (bitLen == k64BitSize) { + /* 0xHHHH000000000000 or 0x0000HHHH00000000, return true */ + if (((val & ((static_cast(0xffff)) << k48BitSize)) == val) || + ((val & ((static_cast(0xffff)) << k32BitSize)) == val)) { + return true; + } + } else { + /* get lower 32 bits */ + val &= static_cast(0xffffffff); + } + /* 0x00000000HHHH0000 or 0x000000000000HHHH, return true */ + return ((val & ((static_cast(0xffff)) << k16BitSize)) == val || + (val & ((static_cast(0xffff)) << 0)) == val); +} + +bool BetterUseMOVZ(uint64 val) { + int32 n16zerosChunks = 0; + int32 n16onesChunks = 0; + uint64 sa = 0; + /* a 64 bits number is split 4 chunks, each chunk has 16 bits. check each chunk whether is all 1 or is all 0 */ + for (uint64 i = 0; i < k4BitSize; ++i, sa += k16BitSize) { + uint64 chunkVal = (val >> (static_cast(sa))) & 0x0000FFFFUL; + if (chunkVal == 0) { + ++n16zerosChunks; + } else if (chunkVal == 0xFFFFUL) { + ++n16onesChunks; + } + } + /* + * note that since we already check if the value + * can be movable with as a single mov instruction, + * we should not exepct either n_16zeros_chunks>=3 or n_16ones_chunks>=3 + */ + ASSERT(n16zerosChunks <= kN16ChunksCheck, "n16zerosChunks ERR"); + ASSERT(n16onesChunks <= kN16ChunksCheck, "n16onesChunks ERR"); + return (n16zerosChunks >= n16onesChunks); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_insn.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_insn.cpp new file mode 100644 index 0000000000000000000000000000000000000000..92f70e4d384aa5c0c6d6369b538dda809fcfc519 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_insn.cpp @@ -0,0 +1,1507 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_insn.h" +#include +#include "riscv64_cg.h" +#include "insn.h" +#include "metadata_layout.h" + +namespace maplebe { +namespace { +constexpr uint32 kClinitInsnCount = 4; +constexpr uint32 kAdrpLdrInsnCount = 2; +constexpr uint32 kLazyBindingRoutineInsnCount = 1; +constexpr uint32 kClinitTailInsnCount = 2; +constexpr uint32 kLazyLdrInsnCount = 2; +constexpr uint32 kLazyLdrStaticInsnCount = 3; +constexpr uint32 kCheckThrowPendingExceptionInsnCount = 5; +constexpr uint32 kArrayClassCacheLoadCount = 3; +} + +uint32 AArch64Insn::GetResultNum() const { + const AArch64MD *md = &AArch64CG::kMd[mOp]; + uint32 resNum = 0; + for (size_t i = 0; i < opnds.size(); ++i) { + if (md->GetOperand(i)->IsDef()) { + ++resNum; + } + } + return resNum; +} + +uint32 AArch64Insn::GetOpndNum() const { + const AArch64MD *md = &AArch64CG::kMd[mOp]; + uint32 srcNum = 0; + for (size_t i = 0; i < opnds.size(); ++i) { + if (md->GetOperand(i)->IsUse()) { + ++srcNum; + } + } + return srcNum; +} + +/* + * intrinsic_compare_swap_int x0, xt, xs, x1, x2, w3, w4, lable1, label2 + * add xt, x1, x2 + * label1: + * ldaxr ws, [xt] + * cmp ws, w3 + * b.ne label2 + * stlxr ws, w4, [xt] + * cbnz ws, label1 + * label2: + * cset x0, eq + */ +void AArch64Insn::EmitCompareAndSwapInt(Emitter &emitter) const { + /* MOP_compare_and_swapI and MOP_compare_and_swapL have 8 operands */ + ASSERT(opnds.size() > kInsnEighthOpnd, "ensure the operands number"); + const MOperator mOp = GetMachineOpcode(); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + Operand *temp0 = opnds[kInsnSecondOpnd]; + Operand *temp1 = opnds[kInsnThirdOpnd]; + Operand *obj = opnds[kInsnFourthOpnd]; + Operand *offset = opnds[kInsnFifthOpnd]; + /* add xt, x1, x2 */ + emitter.Emit("\tadd\t"); + temp0->Emit(emitter, nullptr); + emitter.Emit(", "); + obj->Emit(emitter, nullptr); + emitter.Emit(", "); + offset->Emit(emitter, nullptr); + emitter.Emit("\n"); + Operand *label1 = opnds[kInsnEighthOpnd]; + /* label1: */ + label1->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* ldaxr ws, [xt] */ + emitter.Emit("\tldaxr\t"); + temp1->Emit(emitter, nullptr); + emitter.Emit(", ["); + temp0->Emit(emitter, nullptr); + emitter.Emit("]\n"); + Operand *expectedValue = opnds[kInsnSixthOpnd]; + OpndProp *expectedValueProp = md->operand[kInsnSixthOpnd]; + /* cmp ws, w3 */ + emitter.Emit("\tcmp\t"); + temp1->Emit(emitter, nullptr); + emitter.Emit(", "); + expectedValue->Emit(emitter, expectedValueProp); + emitter.Emit("\n"); + constexpr uint32 kInsnNinethOpnd = 8; + Operand *label2 = opnds[kInsnNinethOpnd]; + /* b.ne label2 */ + emitter.Emit("\tbne\t"); + label2->Emit(emitter, nullptr); + emitter.Emit("\n"); + Operand *newValue = opnds[kInsnSeventhOpnd]; + /* stlxr ws, w4, [xt] */ + emitter.Emit("\tstlxr\t"); + emitter.Emit(AArch64CG::intRegNames[AArch64CG::kR32List][static_cast(temp1)->GetRegisterNumber()]); + emitter.Emit(", "); + newValue->Emit(emitter, nullptr); + emitter.Emit(", ["); + temp0->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* cbnz ws, label1 */ + emitter.Emit("\tcbnz\t"); + emitter.Emit(AArch64CG::intRegNames[AArch64CG::kR32List][static_cast(temp1)->GetRegisterNumber()]); + emitter.Emit(", "); + label1->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* label2: */ + label2->Emit(emitter, nullptr); + emitter.Emit(":\n"); + Operand *retVal = opnds[kInsnFirstOpnd]; + /* cset x0, eq */ + emitter.Emit("\tcset\t"); + retVal->Emit(emitter, nullptr); + emitter.Emit(", EQ\n"); +} + +/* + * intrinsic_string_indexof w0, x1, w2, x3, w4, x5, x6, x7, x8, x9, w10, + * Label.FIRST_LOOP, Label.STR2_NEXT, Label.STR1_LOOP, + * Label.STR1_NEXT, Label.LAST_WORD, Label.NOMATCH, Label.RET + * cmp w4, w2 + * b.gt .Label.NOMATCH + * sub w2, w2, w4 + * sub w4, w4, #8 + * mov w10, w2 + * uxtw x4, w4 + * uxtw x2, w2 + * add x3, x3, x4 + * add x1, x1, x2 + * neg x4, x4 + * neg x2, x2 + * ldr x5, [x3,x4] + * .Label.FIRST_LOOP: + * ldr x7, [x1,x2] + * cmp x5, x7 + * b.eq .Label.STR1_LOOP + * .Label.STR2_NEXT: + * adds x2, x2, #1 + * b.le .Label.FIRST_LOOP + * b .Label.NOMATCH + * .Label.STR1_LOOP: + * adds x8, x4, #8 + * add x9, x2, #8 + * b.ge .Label.LAST_WORD + * .Label.STR1_NEXT: + * ldr x6, [x3,x8] + * ldr x7, [x1,x9] + * cmp x6, x7 + * b.ne .Label.STR2_NEXT + * adds x8, x8, #8 + * add x9, x9, #8 + * b.lt .Label.STR1_NEXT + * .Label.LAST_WORD: + * ldr x6, [x3] + * sub x9, x1, x4 + * ldr x7, [x9,x2] + * cmp x6, x7 + * b.ne .Label.STR2_NEXT + * add w0, w10, w2 + * b .Label.RET + * .Label.NOMATCH: + * mov w0, #-1 + * .Label.RET: + */ +void AArch64Insn::EmitStringIndexOf(Emitter &emitter) const { + /* MOP_string_indexof has 18 operands */ + ASSERT(opnds.size() == 18, "ensure the operands number"); + Operand *patternLengthOpnd = opnds[kInsnFifthOpnd]; + Operand *srcLengthOpnd = opnds[kInsnThirdOpnd]; + const std::string patternLengthReg = + AArch64CG::intRegNames[AArch64CG::kR64List][static_cast(patternLengthOpnd)->GetRegisterNumber()]; + const std::string srcLengthReg = + AArch64CG::intRegNames[AArch64CG::kR64List][static_cast(srcLengthOpnd)->GetRegisterNumber()]; + /* cmp w4, w2 */ + emitter.Emit("\tcmp\t"); + patternLengthOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + srcLengthOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* the 16th operand of MOP_string_indexof is Label.NOMATCH */ + Operand *labelNoMatch = opnds[16]; + /* b.gt Label.NOMATCH */ + emitter.Emit("\tb.gt\t"); + labelNoMatch->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* sub w2, w2, w4 */ + emitter.Emit("\tsub\t"); + srcLengthOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + srcLengthOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + patternLengthOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* sub w4, w4, #8 */ + emitter.Emit("\tsub\t"); + patternLengthOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + patternLengthOpnd->Emit(emitter, nullptr); + emitter.Emit(", #8\n"); + /* the 10th operand of MOP_string_indexof is w10 */ + Operand *resultTmp = opnds[10]; + /* mov w10, w2 */ + emitter.Emit("\tmov\t"); + resultTmp->Emit(emitter, nullptr); + emitter.Emit(", "); + srcLengthOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* uxtw x4, w4 */ + emitter.Emit("\tuxtw\t").Emit(patternLengthReg); + emitter.Emit(", "); + patternLengthOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* uxtw x2, w2 */ + emitter.Emit("\tuxtw\t").Emit(srcLengthReg); + emitter.Emit(", "); + srcLengthOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + Operand *patternStringBaseOpnd = opnds[kInsnFourthOpnd]; + /* add x3, x3, x4 */ + emitter.Emit("\tadd\t"); + patternStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + patternStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(", ").Emit(patternLengthReg); + emitter.Emit("\n"); + Operand *srcStringBaseOpnd = opnds[kInsnSecondOpnd]; + /* add x1, x1, x2 */ + emitter.Emit("\tadd\t"); + srcStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + srcStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(", ").Emit(srcLengthReg); + emitter.Emit("\n"); + /* neg x4, x4 */ + emitter.Emit("\tneg\t").Emit(patternLengthReg); + emitter.Emit(", ").Emit(patternLengthReg); + emitter.Emit("\n"); + /* neg x2, x2 */ + emitter.Emit("\tneg\t").Emit(srcLengthReg); + emitter.Emit(", ").Emit(srcLengthReg); + emitter.Emit("\n"); + Operand *first = opnds[kInsnSixthOpnd]; + /* ldr x5, [x3,x4] */ + emitter.Emit("\tldr\t"); + first->Emit(emitter, nullptr); + emitter.Emit(", ["); + patternStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(",").Emit(patternLengthReg); + emitter.Emit("]\n"); + /* the 11th operand of MOP_string_indexof is Label.FIRST_LOOP */ + Operand *labelFirstLoop = opnds[11]; + /* .Label.FIRST_LOOP: */ + labelFirstLoop->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* the 7th operand of MOP_string_indexof is x7 */ + Operand *ch2 = opnds[7]; + /* ldr x7, [x1,x2] */ + emitter.Emit("\tldr\t"); + ch2->Emit(emitter, nullptr); + emitter.Emit(", ["); + srcStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(",").Emit(srcLengthReg); + emitter.Emit("]\n"); + /* cmp x5, x7 */ + emitter.Emit("\tcmp\t"); + first->Emit(emitter, nullptr); + emitter.Emit(", "); + ch2->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* the 13th operand of MOP_string_indexof is Label.STR1_LOOP */ + Operand *labelStr1Loop = opnds[13]; + /* b.eq .Label.STR1_LOOP */ + emitter.Emit("\tb.eq\t"); + labelStr1Loop->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* the 12th operand of MOP_string_indexof is Label.STR2_NEXT */ + Operand *labelStr2Next = opnds[12]; + /* .Label.STR2_NEXT: */ + labelStr2Next->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* adds x2, x2, #1 */ + emitter.Emit("\tadds\t").Emit(srcLengthReg); + emitter.Emit(", ").Emit(srcLengthReg); + emitter.Emit(", #1\n"); + /* b.le .Label.FIRST_LOOP */ + emitter.Emit("\tb.le\t"); + labelFirstLoop->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* b .Label.NOMATCH */ + emitter.Emit("\tb\t"); + labelNoMatch->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* .Label.STR1_LOOP: */ + labelStr1Loop->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* the 8th operand of MOP_string_indexof is x8 */ + Operand *tmp1 = opnds[8]; + /* adds x8, x4, #8 */ + emitter.Emit("\tadds\t"); + tmp1->Emit(emitter, nullptr); + emitter.Emit(", ").Emit(patternLengthReg); + emitter.Emit(", #8\n"); + /* the 9th operand of MOP_string_indexof is x9 */ + Operand *tmp2 = opnds[9]; + /* add x9, x2, #8 */ + emitter.Emit("\tadd\t"); + tmp2->Emit(emitter, nullptr); + emitter.Emit(", ").Emit(srcLengthReg); + emitter.Emit(", #8\n"); + /* the 15th operand of MOP_string_indexof is Label.LAST_WORD */ + Operand *labelLastWord = opnds[15]; + /* b.ge .Label.LAST_WORD */ + emitter.Emit("\tb.ge\t"); + labelLastWord->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* the 14th operand of MOP_string_indexof is Label.STR1_NEXT */ + Operand *labelStr1Next = opnds[14]; + /* .Label.STR1_NEXT: */ + labelStr1Next->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* the 6th operand of MOP_string_indexof is x6 */ + Operand *ch1 = opnds[6]; + /* ldr x6, [x3,x8] */ + emitter.Emit("\tldr\t"); + ch1->Emit(emitter, nullptr); + emitter.Emit(", ["); + patternStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(","); + tmp1->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* ldr x7, [x1,x9] */ + emitter.Emit("\tldr\t"); + ch2->Emit(emitter, nullptr); + emitter.Emit(", ["); + srcStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(","); + tmp2->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* cmp x6, x7 */ + emitter.Emit("\tcmp\t"); + ch1->Emit(emitter, nullptr); + emitter.Emit(", "); + ch2->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* b.ne .Label.STR2_NEXT */ + emitter.Emit("\tb.ne\t"); + labelStr2Next->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* adds x8, x8, #8 */ + emitter.Emit("\tadds\t"); + tmp1->Emit(emitter, nullptr); + emitter.Emit(", "); + tmp1->Emit(emitter, nullptr); + emitter.Emit(", #8\n"); + /* add x9, x9, #8 */ + emitter.Emit("\tadd\t"); + tmp2->Emit(emitter, nullptr); + emitter.Emit(", "); + tmp2->Emit(emitter, nullptr); + emitter.Emit(", #8\n"); + /* b.lt .Label.STR1_NEXT */ + emitter.Emit("\tb.lt\t"); + labelStr1Next->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* .Label.LAST_WORD: */ + labelLastWord->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* ldr x6, [x3] */ + emitter.Emit("\tldr\t"); + ch1->Emit(emitter, nullptr); + emitter.Emit(", ["); + patternStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* sub x9, x1, x4 */ + emitter.Emit("\tsub\t"); + tmp2->Emit(emitter, nullptr); + emitter.Emit(", "); + srcStringBaseOpnd->Emit(emitter, nullptr); + emitter.Emit(", ").Emit(patternLengthReg); + emitter.Emit("\n"); + /* ldr x7, [x9,x2] */ + emitter.Emit("\tldr\t"); + ch2->Emit(emitter, nullptr); + emitter.Emit(", ["); + tmp2->Emit(emitter, nullptr); + emitter.Emit(", ").Emit(srcLengthReg); + emitter.Emit("]\n"); + /* cmp x6, x7 */ + emitter.Emit("\tcmp\t"); + ch1->Emit(emitter, nullptr); + emitter.Emit(", "); + ch2->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* b.ne .Label.STR2_NEXT */ + emitter.Emit("\tb.ne\t"); + labelStr2Next->Emit(emitter, nullptr); + emitter.Emit("\n"); + Operand *retVal = opnds[kInsnFirstOpnd]; + /* add w0, w10, w2 */ + emitter.Emit("\tadd\t"); + retVal->Emit(emitter, nullptr); + emitter.Emit(", "); + resultTmp->Emit(emitter, nullptr); + emitter.Emit(", "); + srcLengthOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* the 17th operand of MOP_string_indexof Label.ret */ + Operand *labelRet = opnds[17]; + /* b .Label.ret */ + emitter.Emit("\tb\t"); + labelRet->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* .Label.NOMATCH: */ + labelNoMatch->Emit(emitter, nullptr); + emitter.Emit(":\n"); + /* mov w0, #-1 */ + emitter.Emit("\tmov\t"); + retVal->Emit(emitter, nullptr); + emitter.Emit(", #-1\n"); + /* .Label.ret: */ + labelRet->Emit(emitter, nullptr); + emitter.Emit(":\n"); +} + +/* + * intrinsic_get_add_int w0, xt, wt, ws, x1, x2, w3, label + * add xt, x1, x2 + * label: + * ldaxr w0, [xt] + * add wt, w0, w3 + * stlxr ws, wt, [xt] + * cbnz ws, label + */ +void AArch64Insn::EmitGetAndAddInt(Emitter &emitter) const { + ASSERT(opnds.size() > kInsnEighthOpnd, "ensure the oprands number"); + emitter.Emit("\t//\tstart of Unsafe.getAndAddInt.\n"); + Operand *tempOpnd0 = opnds[kInsnSecondOpnd]; + Operand *tempOpnd1 = opnds[kInsnThirdOpnd]; + Operand *tempOpnd2 = opnds[kInsnFourthOpnd]; + Operand *objOpnd = opnds[kInsnFifthOpnd]; + Operand *offsetOpnd = opnds[kInsnSixthOpnd]; + Operand *deltaOpnd = opnds[kInsnSeventhOpnd]; + Operand *labelOpnd = opnds[kInsnEighthOpnd]; + /* emit add. */ + emitter.Emit("\t").Emit("add").Emit("\t"); + tempOpnd0->Emit(emitter, nullptr); + emitter.Emit(", "); + objOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + offsetOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + /* emit label. */ + labelOpnd->Emit(emitter, nullptr); + emitter.Emit(":\n"); + Operand *retVal = opnds[kInsnFirstOpnd]; + const MOperator mOp = GetMachineOpcode(); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + OpndProp *retProp = md->operand[kInsnFirstOpnd]; + /* emit ldaxr */ + emitter.Emit("\t").Emit("ldaxr").Emit("\t"); + retVal->Emit(emitter, retProp); + emitter.Emit(", ["); + tempOpnd0->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* emit add. */ + emitter.Emit("\t").Emit("add").Emit("\t"); + tempOpnd1->Emit(emitter, retProp); + emitter.Emit(", "); + retVal->Emit(emitter, retProp); + emitter.Emit(", "); + deltaOpnd->Emit(emitter, retProp); + emitter.Emit("\n"); + /* emit stlxr. */ + emitter.Emit("\t").Emit("stlxr").Emit("\t"); + tempOpnd2->Emit(emitter, nullptr); + emitter.Emit(", "); + tempOpnd1->Emit(emitter, retProp); + emitter.Emit(", ["); + tempOpnd0->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* emit cbnz. */ + emitter.Emit("\t").Emit("cbnz").Emit("\t"); + tempOpnd2->Emit(emitter, nullptr); + emitter.Emit(", "); + labelOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + emitter.Emit("\t//\tend of Unsafe.getAndAddInt.\n"); +} + +/* + * intrinsic_get_set_int w0, xt, ws, x1, x2, w3, label + * add xt, x1, x2 + * label: + * ldaxr w0, [xt] + * stlxr ws, w3, [xt] + * cbnz ws, label + */ +void AArch64Insn::EmitGetAndSetInt(Emitter &emitter) const { + /* MOP_get_and_setI and MOP_get_and_setL have 7 operands */ + ASSERT(opnds.size() > kInsnSeventhOpnd, "ensure the operands number"); + Operand *tempOpnd0 = opnds[kInsnSecondOpnd]; + Operand *tempOpnd1 = opnds[kInsnThirdOpnd]; + Operand *objOpnd = opnds[kInsnFourthOpnd]; + Operand *offsetOpnd = opnds[kInsnFifthOpnd]; + /* add x1, x1, x2 */ + emitter.Emit("\tadd\t"); + tempOpnd0->Emit(emitter, nullptr); + emitter.Emit(", "); + objOpnd->Emit(emitter, nullptr); + emitter.Emit(", "); + offsetOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); + Operand *labelOpnd = opnds[kInsnSeventhOpnd]; + /* label: */ + labelOpnd->Emit(emitter, nullptr); + emitter.Emit(":\n"); + Operand *retVal = opnds[kInsnFirstOpnd]; + /* ldaxr w0, [xt] */ + emitter.Emit("\tldaxr\t"); + retVal->Emit(emitter, nullptr); + emitter.Emit(", ["); + tempOpnd0->Emit(emitter, nullptr); + emitter.Emit("]\n"); + Operand *newValueOpnd = opnds[kInsnSixthOpnd]; + /* stlxr ws, w3, [xt] */ + emitter.Emit("\tstlxr\t"); + tempOpnd1->Emit(emitter, nullptr); + emitter.Emit(", "); + newValueOpnd->Emit(emitter, nullptr); + emitter.Emit(", ["); + tempOpnd0->Emit(emitter, nullptr); + emitter.Emit("]\n"); + /* cbnz w2, label */ + emitter.Emit("\tcbnz\t"); + tempOpnd1->Emit(emitter, nullptr); + emitter.Emit(", "); + labelOpnd->Emit(emitter, nullptr); + emitter.Emit("\n"); +} + +void AArch64Insn::EmitCounter(const CG &cg, Emitter &emitter) const { + /* + * adrp x1, __profile_bb_table$$GetBoolean_dex+4 + * ldr w17, [x1, #:lo12:__profile_bb_table$$GetBoolean_dex+4] + * add w17, w17, #1 + * str w17, [x1, #:lo12:__profile_bb_table$$GetBoolean_dex+4] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_counter]; + + Operand *opnd0 = opnds[kInsnFirstOpnd]; + Operand *opnd1 = opnds[kInsnSecondOpnd]; + OpndProp *prop0 = md->operand[kInsnFirstOpnd]; + StImmOperand *stImmOpnd = static_cast(opnd1); + CHECK_FATAL(stImmOpnd != nullptr, "stImmOpnd is null in AArch64Insn::EmitCounter"); + /* emit nop for breakpoint */ + if (cg.GetCGOptions().WithDwarf()) { + emitter.Emit("\t").Emit("nop").Emit("\n"); + } + + /* emit adrp */ + emitter.Emit("\t").Emit("adrp").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit(stImmOpnd->GetName()); + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + emitter.Emit("\n"); + /* emit ldr */ + emitter.Emit("\t").Emit("ldr").Emit("\tw17, ["); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("#"); + emitter.Emit(":lo12:").Emit(stImmOpnd->GetName()); + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + emitter.Emit("]"); + emitter.Emit("\n"); + /* emit add */ + emitter.Emit("\t").Emit("add").Emit("\tw17, w17, #1"); + emitter.Emit("\n"); + /* emit str */ + emitter.Emit("\t").Emit("str").Emit("\tw17, ["); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("#"); + emitter.Emit(":lo12:").Emit(stImmOpnd->GetName()); + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + emitter.Emit("]"); + emitter.Emit("\n"); +} + +void AArch64Insn::EmitClinit(const CG &cg, Emitter &emitter) const { + /* + * adrp x3, __muid_data_undef_tab$$GetBoolean_dex+144 + * ldr x3, [x3, #:lo12:__muid_data_undef_tab$$GetBoolean_dex+144] + * or, + * adrp x3, _PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B + * ldr x3, [x3, #:lo12:_PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B] + * + * ldr x3, [x3,#112] + * ldr wzr, [x3] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_clinit]; + + Operand *opnd0 = opnds[0]; + Operand *opnd1 = opnds[1]; + OpndProp *prop0 = md->operand[0]; + auto *stImmOpnd = static_cast(opnd1); + CHECK_FATAL(stImmOpnd != nullptr, "stImmOpnd is null in AArch64Insn::EmitClinit"); + /* emit nop for breakpoint */ + if (cg.GetCGOptions().WithDwarf()) { + emitter.Emit("\t").Emit("nop").Emit("\n"); + } + + if (stImmOpnd->GetSymbol()->IsMuidDataUndefTab()) { + /* emit adrp */ + emitter.Emit("\t").Emit("adrp").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit(stImmOpnd->GetName()); + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + emitter.Emit("\n"); + /* emit ldr */ + emitter.Emit("\t").Emit("ldr").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("["); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("#"); + emitter.Emit(":lo12:").Emit(stImmOpnd->GetName()); + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + emitter.Emit("]"); + emitter.Emit("\n"); + } else { + /* adrp x3, _PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B */ + emitter.Emit("\tadrp\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + (void)emitter.Emit(namemangler::kPtrPrefixStr + stImmOpnd->GetName()); + emitter.Emit("\n"); + + /* ldr x3, [x3, #:lo12:_PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B] */ + emitter.Emit("\tldr\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", ["); + opnd0->Emit(emitter, prop0); + emitter.Emit(", #:lo12:"); + (void)emitter.Emit(namemangler::kPtrPrefixStr + stImmOpnd->GetName()); + emitter.Emit("]\n"); + } + /* emit "ldr x0,[x0,#48]" */ + emitter.Emit("\t").Emit("ldr").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("["); + opnd0->Emit(emitter, prop0); + emitter.Emit(",#"); + emitter.Emit(static_cast(ClassMetadata::OffsetOfInitState())); + emitter.Emit("]"); + emitter.Emit("\n"); + + /* emit "ldr xzr, [x0]" */ + emitter.Emit("\t").Emit("ldr\txzr, ["); + opnd0->Emit(emitter, prop0); + emitter.Emit("]\n"); +} + +void AArch64Insn::EmitAdrpLdr(const CG &cg, Emitter &emitter) const { + /* + * adrp xd, _PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B + * ldr xd, [xd, #:lo12:_PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_adrp_ldr]; + + Operand *opnd0 = opnds[0]; + Operand *opnd1 = opnds[1]; + OpndProp *prop0 = md->operand[0]; + auto *stImmOpnd = static_cast(opnd1); + CHECK_FATAL(stImmOpnd != nullptr, "stImmOpnd is null in AArch64Insn::EmitAdrpLdr"); + /* emit nop for breakpoint */ + if (cg.GetCGOptions().WithDwarf()) { + emitter.Emit("\t").Emit("nop").Emit("\n"); + } + + /* adrp xd, _PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B */ + emitter.Emit("\t").Emit("adrp").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", "); + emitter.Emit(stImmOpnd->GetName()); + if (stImmOpnd->GetOffset() != 0) { + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + } + emitter.Emit("\n"); + + /* ldr xd, [xd, #:lo12:_PTR__cinf_Ljava_2Futil_2Fconcurrent_2Fatomic_2FAtomicInteger_3B] */ + emitter.Emit("\tldr\t"); + static_cast(opnd0)->SetRefField(true); + opnd0->Emit(emitter, prop0); + static_cast(opnd0)->SetRefField(false); + emitter.Emit(", "); + emitter.Emit("["); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("#"); + emitter.Emit(":lo12:").Emit(stImmOpnd->GetName()); + if (stImmOpnd->GetOffset() != 0) { + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + } + emitter.Emit("]\n"); +} + +void AArch64Insn::EmitAdrpLabel(Emitter &emitter) const { + /* adrp xd, label + * add xd, xd, #lo12:label + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_adrp_label]; + + Operand *opnd0 = opnds[0]; + Operand *opnd1 = opnds[1]; + OpndProp *prop0 = static_cast(md->operand[0]); + LabelIdx lidx = static_cast(opnd1)->GetValue(); + + /* adrp xd, label */ + emitter.Emit("\t").Emit("adrp").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", "); + const char *idx; + idx = strdup(std::to_string(Globals::GetInstance()->GetBECommon()->GetMIRModule().CurFunction()->GetPuidx()).c_str()); + emitter.Emit(".label.").Emit(idx).Emit("__").Emit(lidx).Emit("\n"); + + /* add xd, xd, #lo12:label */ + emitter.Emit("\tadd\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", "); + opnd0->Emit(emitter, prop0); + emitter.Emit(", "); + emitter.Emit(":lo12:").Emit(".label.").Emit(idx).Emit("__").Emit(lidx).Emit("\n"); + emitter.Emit("\n"); +} + +void AArch64Insn::EmitLazyBindingRoutine(Emitter &emitter) const { + /* ldr xzr, [xs] */ + const AArch64MD *md = &AArch64CG::kMd[MOP_adrp_ldr]; + + Operand *opnd0 = opnds[0]; + OpndProp *prop0 = md->operand[0]; + + /* emit "ldr xzr,[xs]" */ +#ifdef USE_32BIT_REF + emitter.Emit("\t").Emit("ldr").Emit("\twzr, ["); +#else + emitter.Emit("\t").Emit("ldr").Emit("\txzr, ["); +#endif /* USE_32BIT_REF */ + opnd0->Emit(emitter, prop0); + emitter.Emit("]"); + emitter.Emit("\t// Lazy binding\n"); +} + +void AArch64Insn::EmitClinitTail(Emitter &emitter) const { + /* + * ldr x17, [xs, #112] + * ldr wzr, [x17] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_clinit_tail]; + + Operand *opnd0 = opnds[0]; + OpndProp *prop0 = md->operand[0]; + + /* emit "ldr x17,[xs,#112]" */ + emitter.Emit("\t").Emit("ldr").Emit("\tx17, ["); + opnd0->Emit(emitter, prop0); + emitter.Emit(", #"); + emitter.Emit(static_cast(ClassMetadata::OffsetOfInitState())); + emitter.Emit("]"); + emitter.Emit("\n"); + + /* emit "ldr xzr, [x17]" */ + emitter.Emit("\t").Emit("ldr\txzr, [x17]\n"); +} + +void AArch64Insn::EmitLazyLoad(Emitter &emitter) const { + /* + * ldr wd, [xs] # xd and xs should be differenct register + * ldr wd, [xd] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_lazy_ldr]; + + Operand *opnd0 = opnds[0]; + Operand *opnd1 = opnds[1]; + OpndProp *prop0 = md->operand[0]; + OpndProp *prop1 = md->operand[1]; + + /* emit "ldr wd, [xs]" */ + emitter.Emit("\t").Emit("ldr\t"); +#ifdef USE_32BIT_REF + opnd0->Emit(emitter, prop0); +#else + opnd0->Emit(emitter, prop1); +#endif + emitter.Emit(", ["); + opnd1->Emit(emitter, prop1); + emitter.Emit("]\t// lazy load.\n"); + + /* emit "ldr wd, [xd]" */ + emitter.Emit("\t").Emit("ldr\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", ["); + opnd0->Emit(emitter, prop1); + emitter.Emit("]\t// lazy load.\n"); +} + +void AArch64Insn::EmitLazyLoadStatic(Emitter &emitter) const { + /* adrp xd, :got:__staticDecoupleValueOffset$$xxx+offset + * ldr wd, [xd, #:got_lo12:__staticDecoupleValueOffset$$xxx+offset] + * ldr wzr, [xd] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_lazy_ldr_static]; + + Operand *opnd0 = opnds[0]; + Operand *opnd1 = opnds[1]; + AArch64OpndProp *prop0 = md->GetOperand(0); + auto *stImmOpnd = static_cast(opnd1); + CHECK_FATAL(stImmOpnd != nullptr, "stImmOpnd is null in AArch64Insn::EmitLazyLoadStatic"); + + /* emit "adrp xd, :got:__staticDecoupleValueOffset$$xxx+offset" */ + emitter.Emit("\t").Emit("adrp").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", "); + emitter.Emit(stImmOpnd->GetName()); + if (stImmOpnd->GetOffset() != 0) { + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + } + emitter.Emit("\t// lazy load static.\n"); + + /* emit "ldr wd, [xd, #:got_lo12:__staticDecoupleValueOffset$$xxx+offset]" */ + emitter.Emit("\tldr\t"); + static_cast(opnd0)->SetRefField(true); +#ifdef USE_32BIT_REF + AArch64OpndProp prop2(prop0->GetOperandType(), prop0->GetRegProp(), prop0->GetSize() / 2); + opnd0->Emit(emitter, &prop2); /* ldr wd, ... for emui */ +#else + opnd0->Emit(emitter, prop0); /* ldr xd, ... for qemu */ +#endif /* USE_32BIT_REF */ + static_cast(opnd0)->SetRefField(false); + emitter.Emit(", "); + emitter.Emit("["); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("#"); + emitter.Emit(":lo12:").Emit(stImmOpnd->GetName()); + if (stImmOpnd->GetOffset() != 0) { + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + } + emitter.Emit("]\t// lazy load static.\n"); + + /* emit "ldr wzr, [xd]" */ + emitter.Emit("\t").Emit("ldr\twzr, ["); + opnd0->Emit(emitter, prop0); + emitter.Emit("]\t// lazy load static.\n"); +} + +void AArch64Insn::EmitArrayClassCacheLoad(Emitter &emitter) const { + /* adrp xd, :got:__arrayClassCacheTable$$xxx+offset + * ldr wd, [xd, #:got_lo12:__arrayClassCacheTable$$xxx+offset] + * ldr wzr, [xd] + */ + const AArch64MD *md = &AArch64CG::kMd[MOP_arrayclass_cache_ldr]; + uint32 opndIndex = 0; + uint32 propIndex = 0; + Operand *opnd0 = opnds[opndIndex++]; + Operand *opnd1 = opnds[opndIndex++]; + AArch64OpndProp *prop0 = md->GetOperand(propIndex++); + auto *stImmOpnd = static_cast(opnd1); + CHECK_FATAL(stImmOpnd != nullptr, "stImmOpnd is null in AArch64Insn::EmitLazyLoadStatic"); + + /* emit "adrp xd, :got:__arrayClassCacheTable$$xxx+offset" */ + emitter.Emit("\t").Emit("adrp").Emit("\t"); + opnd0->Emit(emitter, prop0); + emitter.Emit(", "); + emitter.Emit(stImmOpnd->GetName()); + if (stImmOpnd->GetOffset() != 0) { + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + } + emitter.Emit("\t// load array class.\n"); + + /* emit "ldr wd, [xd, #:got_lo12:__arrayClassCacheTable$$xxx+offset]" */ + emitter.Emit("\tldr\t"); + static_cast(opnd0)->SetRefField(true); +#ifdef USE_32BIT_REF + AArch64OpndProp prop2(prop0->GetOperandType(), prop0->GetRegProp(), prop0->GetSize() / 2); + opnd0->Emit(emitter, &prop2); /* ldr wd, ... for emui */ +#else + opnd0->Emit(emitter, prop0); /* ldr xd, ... for qemu */ +#endif /* USE_32BIT_REF */ + static_cast(opnd0)->SetRefField(false); + emitter.Emit(", "); + emitter.Emit("["); + opnd0->Emit(emitter, prop0); + emitter.Emit(","); + emitter.Emit("#"); + emitter.Emit(":lo12:").Emit(stImmOpnd->GetName()); + if (stImmOpnd->GetOffset() != 0) { + emitter.Emit("+").Emit(stImmOpnd->GetOffset()); + } + emitter.Emit("]\t// load array class.\n"); + + /* emit "ldr wzr, [xd]" */ + emitter.Emit("\t").Emit("ldr\twzr, ["); + opnd0->Emit(emitter, prop0); + emitter.Emit("]\t// check resolve array class.\n"); +} + +void AArch64Insn::EmitCheckThrowPendingException(const CG& cg, Emitter &emitter) const { + /* + * mrs x16, TPIDR_EL0 + * ldr x16, [x16, #64] + * ldr x16, [x16, #8] + * cbz x16, .lnoexception + * bl MCC_ThrowPendingException + * .lnoexception: + */ + emitter.Emit("\t").Emit("mrs").Emit("\tx16, TPIDR_EL0"); + emitter.Emit("\n"); + emitter.Emit("\t").Emit("ldr").Emit("\tx16, [x16, #64]"); + emitter.Emit("\n"); + emitter.Emit("\t").Emit("ldr").Emit("\tx16, [x16, #8]"); + emitter.Emit("\n"); + emitter.Emit("\t").Emit("cbz").Emit("\tx16, .lnoeh.").Emit(cg.GetCurCGFunc()->GetName()); + emitter.Emit("\n"); + emitter.Emit("\t").Emit("bl").Emit("\tMCC_ThrowPendingException"); + emitter.Emit("\n"); + emitter.Emit(".lnoeh.").Emit(cg.GetCurCGFunc()->GetName()).Emit(":"); + emitter.Emit("\n"); +} + +void AArch64Insn::Emit(const CG &cg, Emitter &emitter) const { + emitter.SetCurrentMOP(mOp); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + + if (!cg.GenerateVerboseAsm() && !cg.GenerateVerboseCG() && mOp == MOP_comment) { + return; + } + + switch (mOp) { + case MOP_clinit: { + EmitClinit(cg, emitter); + emitter.IncreaseJavaInsnCount(kClinitInsnCount); + return; + } + case MOP_adrp_ldr: { + emitter.IncreaseJavaInsnCount(kAdrpLdrInsnCount); + EmitAdrpLdr(cg, emitter); + if (CGOptions::IsLazyBinding() && !cg.IsLibcore()) { + EmitLazyBindingRoutine(emitter); + emitter.IncreaseJavaInsnCount(kLazyBindingRoutineInsnCount + kAdrpLdrInsnCount); + } + return; + } + case MOP_counter: { + EmitCounter(cg, emitter); + return; + } + case MOP_clinit_tail: { + EmitClinitTail(emitter); + emitter.IncreaseJavaInsnCount(kClinitTailInsnCount); + return; + } + case MOP_lazy_ldr: { + EmitLazyLoad(emitter); + emitter.IncreaseJavaInsnCount(kLazyLdrInsnCount); + return; + } + case MOP_adrp_label: { + EmitAdrpLabel(emitter); + return; + } + case MOP_lazy_tail: { + /* No need to emit this pseudo instruction. */ + return; + } + case MOP_lazy_ldr_static: { + EmitLazyLoadStatic(emitter); + emitter.IncreaseJavaInsnCount(kLazyLdrStaticInsnCount); + return; + } + case MOP_arrayclass_cache_ldr: { + EmitArrayClassCacheLoad(emitter); + emitter.IncreaseJavaInsnCount(kArrayClassCacheLoadCount); + return; + } + case MOP_get_and_addI: + case MOP_get_and_addL: { + EmitGetAndAddInt(emitter); + return; + } + case MOP_get_and_setI: + case MOP_get_and_setL: { + EmitGetAndSetInt(emitter); + return; + } + case MOP_compare_and_swapI: + case MOP_compare_and_swapL: { + EmitCompareAndSwapInt(emitter); + return; + } + case MOP_string_indexof: { + EmitStringIndexOf(emitter); + return; + } + default: + break; + } + + if (CGOptions::IsNativeOpt() && mOp == MOP_xbl) { + auto *nameOpnd = static_cast(opnds[0]); + if (nameOpnd->GetName() == "MCC_CheckThrowPendingException") { + EmitCheckThrowPendingException(cg, emitter); + emitter.IncreaseJavaInsnCount(kCheckThrowPendingExceptionInsnCount); + return; + } + } + + std::string format(md->format); + emitter.Emit("\t").Emit(md->name).Emit("\t"); + size_t opndSize = GetOperandSize(); + std::vector seq(opndSize, -1); + std::vector prefix(opndSize); /* used for print prefix like "*" in icall *rax */ + int32 index = 0; + int32 commaNum = 0; + for (uint32 i = 0; i < format.length(); ++i) { + char c = format[i]; + if ('0' <= c && c <= '5') { + seq[index++] = c - '0'; + ++commaNum; + } else if (c != ',') { + prefix[index].push_back(c); + } + } + + bool isRefField = (opndSize == 0) ? false : CheckRefField(static_cast(static_cast(seq[0])), true); + if (mOp != MOP_comment) { + emitter.IncreaseJavaInsnCount(); + } + for (int32 i = 0; i < commaNum; ++i) { + if (seq[i] == -1) { + continue; + } + if (prefix[i].length() > 0) { + emitter.Emit(prefix[i]); + } + if (emitter.NeedToDealWithHugeSo() && (mOp == MOP_xbl || mOp == MOP_tail_call_opt_xbl)) { + auto *nameOpnd = static_cast(opnds[0]); + /* Suport huge so here + * As the PLT section is just before java_text section, when java_text section is larger + * then 128M, instrunction of "b" and "bl" would fault to branch to PLT stub functions. Here, to save + * instuctions space, we change the branch target to a local target within 120M address, and add non-plt + * call to the target function. + */ + emitter.InsertHugeSoTarget(nameOpnd->GetName()); + emitter.Emit(nameOpnd->GetName() + emitter.HugeSoPostFix()); + break; + } + opnds[seq[i]]->Emit(emitter, md->operand[seq[i]]); + /* reset opnd0 ref-field flag, so following instruction has correct register */ + if (isRefField && (i == 0)) { + static_cast(opnds[seq[0]])->SetRefField(false); + } + /* Temporary comment the label:.Label.debug.callee */ + if (i != (commaNum - 1)) { + emitter.Emit(", "); + } + const int commaNumForEmitLazy = 2; + if (!CGOptions::IsLazyBinding() || cg.IsLibcore() || (mOp != MOP_wldr && mOp != MOP_xldr) || + commaNum != commaNumForEmitLazy || i != 1 || !opnds[seq[1]]->IsMemoryAccessOperand()) { + continue; + } + /* + * Only check the last operand of ldr in lo12 mode. + * Check the second operand, if it's [AArch64MemOperand::kAddrModeLo12Li] + */ + auto *memOpnd = static_cast(opnds[seq[1]]); + if (memOpnd == nullptr || memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeLo12Li) { + continue; + } + const MIRSymbol *sym = memOpnd->GetSymbol(); + if (sym->IsMuidFuncDefTab() || sym->IsMuidFuncUndefTab() || + sym->IsMuidDataDefTab() || sym->IsMuidDataUndefTab()) { + emitter.Emit("\n"); + EmitLazyBindingRoutine(emitter); + emitter.IncreaseJavaInsnCount(kLazyBindingRoutineInsnCount); + } + } + if (cg.GenerateVerboseCG() || (cg.GenerateVerboseAsm() && mOp == MOP_comment)) { + const char *comment = GetComment().c_str(); + if (comment != nullptr && strlen(comment) > 0) { + (void)emitter.Emit("\t\t// ").Emit(comment); + } + } + + emitter.Emit("\n"); +} + +/* set opnd0 ref-field flag, so we can emit the right register */ +bool AArch64Insn::CheckRefField(size_t opndIndex, bool isEmit) const { + if (IsAccessRefField() && AccessMem()) { + Operand *opnd0 = opnds[opndIndex]; + if (opnd0->IsRegister()) { + if (isEmit) { + static_cast(opnd0)->SetRefField(true); + } + return true; + } + } + return false; +} + +Operand *AArch64Insn::GetResult(uint32 id) const { + ASSERT(id < GetResultNum(), "index out of range"); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + uint32 tempIdx = 0; + Operand* resOpnd = nullptr; + for (uint32 i = 0; i < opnds.size(); ++i) { + if (md->GetOperand(i)->IsDef()) { + if (tempIdx == id) { + resOpnd = opnds[i]; + break; + } else { + ++tempIdx; + } + } + } + return resOpnd; +} + +void AArch64Insn::SetOpnd(uint32 id, Operand &opnd) { + ASSERT(id < GetOpndNum(), "index out of range"); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + uint32 tempIdx = 0; + for (uint32 i = 0; i < opnds.size(); ++i) { + if (md->GetOperand(i)->IsUse()) { + if (tempIdx == id) { + opnds[i] = &opnd; + return; + } else { + ++tempIdx; + } + } + } +} + +void AArch64Insn::SetResult(uint32 id, Operand &opnd) { + ASSERT(id < GetResultNum(), "index out of range"); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + uint32 tempIdx = 0; + for (uint32 i = 0; i < opnds.size(); ++i) { + if (md->GetOperand(i)->IsDef()) { + if (tempIdx == id) { + opnds[i] = &opnd; + return; + } else { + ++tempIdx; + } + } + } +} + +Operand *AArch64Insn::GetOpnd(uint32 id) const { + ASSERT(id < GetOpndNum(), "index out of range"); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + Operand *resOpnd = nullptr; + uint32 tempIdx = 0; + for (uint32 i = 0; i < opnds.size(); ++i) { + if (md->GetOperand(i)->IsUse()) { + if (tempIdx == id) { + resOpnd = opnds[i]; + break; + } else { + ++tempIdx; + } + } + } + return resOpnd; +} +/* Return the first memory access operand. */ +Operand *AArch64Insn::GetMemOpnd() const { + for (size_t i = 0; i < opnds.size(); ++i) { + Operand &opnd = GetOperand(i); + if (opnd.IsMemoryAccessOperand()) { + return &opnd; + } + } + return nullptr; +} + +bool AArch64Insn::IsVolatile() const { + return AArch64CG::kMd[mOp].IsVolatile(); +} + +bool AArch64Insn::IsMemAccessBar() const { + return AArch64CG::kMd[mOp].IsMemAccessBar(); +} + +bool AArch64Insn::IsBranch() const { + return AArch64CG::kMd[mOp].IsBranch(); +} + +bool AArch64Insn::IsCondBranch() const { + return AArch64CG::kMd[mOp].IsCondBranch(); +} + +bool AArch64Insn::IsUnCondBranch() const { + return AArch64CG::kMd[mOp].IsUnCondBranch(); +} + +bool AArch64Insn::IsCall() const { + return AArch64CG::kMd[mOp].IsCall(); +} + +bool AArch64Insn::HasLoop() const { + return AArch64CG::kMd[mOp].HasLoop(); +} + +bool AArch64Insn::IsSpecialIntrinsic() const { + switch (mOp) { + case MOP_get_and_addI: + case MOP_get_and_addL: + case MOP_compare_and_swapI: + case MOP_compare_and_swapL: + case MOP_string_indexof: + case MOP_lazy_ldr: + case MOP_get_and_setI: + case MOP_get_and_setL: { + return true; + } + default: { + return false; + } + } +} + +bool AArch64Insn::IsTailCall() const { + return (mOp == MOP_tail_call_opt_xbl || mOp == MOP_tail_call_opt_xblr); +} + +bool AArch64Insn::IsClinit() const { + return (mOp == MOP_clinit || mOp == MOP_clinit_tail || mOp == MOP_adrp_ldr); +} + +bool AArch64Insn::IsLazyLoad() const { + return (mOp == MOP_lazy_ldr) || (mOp == MOP_lazy_ldr_static) || (mOp == MOP_lazy_tail); +} + +bool AArch64Insn::IsAdrpLdr() const { + return mOp == MOP_adrp_ldr; +} + +bool AArch64Insn::IsArrayClassCache() const { + return mOp == MOP_arrayclass_cache_ldr; +} + +bool AArch64Insn::CanThrow() const { + return AArch64CG::kMd[mOp].CanThrow(); +} + +bool AArch64Insn::IsMemAccess() const { + return AArch64CG::kMd[mOp].IsMemAccess(); +} + +bool AArch64Insn::MayThrow() { + const AArch64MD *md = &AArch64CG::kMd[mOp]; + if (md->IsMemAccess() && !IsLoadLabel()) { + auto *aarchMemOpnd = static_cast(GetMemOpnd()); + ASSERT(aarchMemOpnd != nullptr, "CG invalid memory operand."); + RegOperand *baseRegister = aarchMemOpnd->GetBaseRegister(); + if (baseRegister != nullptr && + (baseRegister->GetRegisterNumber() == RFP || baseRegister->GetRegisterNumber() == RSP)) { + return false; + } + } + return md->CanThrow(); +} + +bool AArch64Insn::IsCallToFunctionThatNeverReturns() { + if (IsIndirectCall()) { + return false; + } + auto *target = static_cast(GetCallTargetOperand()); + CHECK_FATAL(target != nullptr, "target is null in AArch64Insn::IsCallToFunctionThatNeverReturns"); + const MIRSymbol *funcSt = target->GetFunctionSymbol(); + ASSERT(funcSt->GetSKind() == kStFunc, "funcst must be a function name symbol"); + MIRFunction *func = funcSt->GetFunction(); + return func->NeverReturns(); +} + +bool AArch64Insn::IsDMBInsn() const { + return AArch64CG::kMd[mOp].IsDMB(); +} + +bool AArch64Insn::IsMove() const { + return AArch64CG::kMd[mOp].IsMove(); +} + +bool AArch64Insn::IsLoad() const { + return AArch64CG::kMd[mOp].IsLoad(); +} + +bool AArch64Insn::IsLoadLabel() const { + return (mOp == MOP_wldli || mOp == MOP_xldli || mOp == MOP_sldli || mOp == MOP_dldli); +} + +bool AArch64Insn::IsStore() const { + return AArch64CG::kMd[mOp].IsStore(); +} + +bool AArch64Insn::IsLoadPair() const { + return AArch64CG::kMd[mOp].IsLoadPair(); +} + +bool AArch64Insn::IsStorePair() const { + return AArch64CG::kMd[mOp].IsStorePair(); +} + +bool AArch64Insn::IsLoadStorePair() const { + return AArch64CG::kMd[mOp].IsLoadStorePair(); +} + +bool AArch64Insn::IsLoadAddress() const { + return AArch64CG::kMd[mOp].IsLoadAddress(); +} + +bool AArch64Insn::IsAtomic() const { + return AArch64CG::kMd[mOp].IsAtomic(); +} + +bool AArch64Insn::IsPartDef() const { + return AArch64CG::kMd[mOp].IsPartDef(); +} + +bool AArch64Insn::OpndIsDef(uint32 id) const { + return AArch64CG::kMd[mOp].GetOperand(id)->IsDef(); +} + +bool AArch64Insn::OpndIsUse(uint32 id) const { + return AArch64CG::kMd[mOp].GetOperand(id)->IsUse(); +} + +uint32 AArch64Insn::GetLatencyType() const { + return AArch64CG::kMd[mOp].GetLatencyType(); +} + +uint32 AArch64Insn::GetAtomicNum() const { + return AArch64CG::kMd[mOp].GetAtomicNum(); +} + +bool AArch64Insn::IsYieldPoint() const { + /* + * It is a yieldpoint if loading from a dedicated + * register holding polling page address: + * ldr wzr, [RYP] + */ + if (IsLoad() && !IsLoadLabel()) { + auto mem = static_cast(GetOpnd(0)); + return (mem != nullptr && mem->GetBaseRegister() != nullptr && mem->GetBaseRegister()->GetRegisterNumber() == RYP); + } + return false; +} +/* Return the copy operand id of reg1 if it is an insn who just do copy from reg1 to reg2. + * i. mov reg2, reg1 + * ii. add/sub reg2, reg1, 0 + * iii. mul reg2, reg1, 1 + */ +int32 AArch64Insn::CopyOperands() const { + if (mOp >= MOP_xmovrr && mOp <= MOP_xvmovrv) { + return 1; + } + if ((mOp >= MOP_xaddrrr && mOp <= MOP_ssub) || (mOp >= MOP_xlslrri6 && mOp <= MOP_wlsrrrr)) { + Operand &opnd2 = GetOperand(kInsnThirdOpnd); + if (opnd2.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd2); + if (immOpnd.IsZero()) { + return 1; + } + } + } + if (mOp > MOP_xmulrrr && mOp <= MOP_xvmuld) { + Operand &opnd2 = GetOperand(kInsnThirdOpnd); + if (opnd2.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd2); + if (immOpnd.GetValue() == 1) { + return 1; + } + } + } + return -1; +} + +void AArch64Insn::CheckOpnd(Operand &opnd, OpndProp &prop) const { + (void)opnd; + (void)prop; +#if DEBUG + auto &mopd = static_cast(prop); + switch (opnd.GetKind()) { + case Operand::kOpdRegister: + ASSERT(mopd.IsRegister(), "expect reg"); + break; + case Operand::kOpdOffset: + case Operand::kOpdImmediate: + ASSERT(mopd.GetOperandType() == Operand::kOpdImmediate, "expect imm"); + break; + case Operand::kOpdFPImmediate: + ASSERT(mopd.GetOperandType() == Operand::kOpdFPImmediate, "expect fpimm"); + break; + case Operand::kOpdFPZeroImmediate: + ASSERT(mopd.GetOperandType() == Operand::kOpdFPZeroImmediate, "expect fpzero"); + break; + case Operand::kOpdMem: + ASSERT(mopd.GetOperandType() == Operand::kOpdMem, "expect mem"); + break; + case Operand::kOpdBBAddress: + ASSERT(mopd.GetOperandType() == Operand::kOpdBBAddress, "expect address"); + break; + case Operand::kOpdList: + ASSERT(mopd.GetOperandType() == Operand::kOpdList, "expect list operand"); + break; + case Operand::kOpdCond: + ASSERT(mopd.GetOperandType() == Operand::kOpdCond, "expect cond operand"); + break; + case Operand::kOpdShift: + ASSERT(mopd.GetOperandType() == Operand::kOpdShift, "expect LSL operand"); + break; + case Operand::kOpdStImmediate: + ASSERT(mopd.GetOperandType() == Operand::kOpdStImmediate, "expect symbol name (literal)"); + break; + case Operand::kOpdString: + ASSERT(mopd.GetOperandType() == Operand::kOpdString, "expect a string"); + break; + default: + ASSERT(false, "NYI"); + break; + } +#endif +} + +bool AArch64Insn::Check() const { +#if DEBUG + const AArch64MD *md = &AArch64CG::kMd[mOp]; + if (md == nullptr) { + return false; + } + for (uint32 i = 0; i < GetOperandSize(); ++i) { + Operand &opnd = GetOperand(i); + /* maybe if !opnd, break ? */ + CheckOpnd(opnd, *(md->operand[i])); + } + return true; +#else + return false; +#endif +} + +void AArch64Insn::Dump() const { + const AArch64MD *md = &AArch64CG::kMd[mOp]; + ASSERT(md != nullptr, "md should not be nullptr"); + + LogInfo::MapleLogger() << "< " << GetId() << " > "; + LogInfo::MapleLogger() << md->name << "(" << mOp << ")"; + + for (uint32 i = 0; i < GetOperandSize(); ++i) { + Operand &opnd = GetOperand(i); + LogInfo::MapleLogger() << " (opnd" << i << ": "; + opnd.Dump(); + LogInfo::MapleLogger() << ")"; + } + LogInfo::MapleLogger() << "\n"; +} + +bool AArch64Insn::IsDefinition() const { + /* check if we are seeing ldp or not */ + ASSERT(AArch64CG::kMd[mOp].GetOperand(1) == nullptr || + !AArch64CG::kMd[mOp].GetOperand(1)->IsRegDef(), "check if we are seeing ldp or not"); + if (AArch64CG::kMd[mOp].GetOperand(0) == nullptr) { + return false; + } + return AArch64CG::kMd[mOp].GetOperand(0)->IsRegDef(); +} + +bool AArch64Insn::IsDestRegAlsoSrcReg() const { + auto *prop0 = static_cast(AArch64CG::kMd[mOp].GetOperand(0)); + ASSERT(prop0 != nullptr, "expect a AArch64OpndProp"); + return prop0->IsRegDef() && prop0->IsRegUse(); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_isa.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_isa.cpp new file mode 100644 index 0000000000000000000000000000000000000000..75cb8dde931da6c2c77941d46f52066dd1bf35a2 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_isa.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ + +#include "riscv64_isa.h" + +namespace maplebe { +/* + * Get the ldp/stp corresponding to ldr/str + * mop : a ldr or str machine operator + */ +MOperator GetMopPair(MOperator mop) { + switch (mop) { + case MOP_xldr: + return MOP_xldp; + case MOP_wldr: + return MOP_wldp; + case MOP_xstr: + return MOP_xstp; + case MOP_wstr: + return MOP_wstp; + case MOP_dldr: + return MOP_dldp; + case MOP_sldr: + return MOP_sldp; + case MOP_dstr: + return MOP_dstp; + case MOP_sstr: + return MOP_sstp; + default: + ASSERT(false, "should not run here"); + return MOP_undef; + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_live.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_live.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d23eec5aee01e1c68d17f369de009b7896edea78 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_live.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_live.h" +#include "riscv64_cg.h" + +namespace maplebe { +void AArch64LiveAnalysis::InitEhDefine(BB &bb) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + /* Insert MOP_pseudo_eh_def_x R1. */ + RegOperand ®R1 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, kRegTyInt); + Insn &pseudoInsn1 = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_eh_def_x, regR1); + bb.InsertInsnBegin(pseudoInsn1); + + /* Insert MOP_pseudo_eh_def_x R0. */ + RegOperand ®R0 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt); + Insn &pseudoInsn2 = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_eh_def_x, regR0); + bb.InsertInsnBegin(pseudoInsn2); +} + +/* build use and def sets of each BB according to the type of regOpnd. */ +void AArch64LiveAnalysis::CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse) const { + if (!opnd.IsRegister()) { + return; + } + const RegOperand ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyVary) { + return; + } + if (isDef) { + bb.SetDefBit(regNO); + if (!isUse) { + bb.UseResetBit(regNO); + } + } + if (isUse) { + bb.SetUseBit(regNO); + bb.DefResetBit(regNO); + } +} + +/* + * entry of get def/use of bb. + * getting the def or use info of each regopnd as parameters of CollectLiveInfo(). +*/ +void AArch64LiveAnalysis::GetBBDefUse(BB &bb) { + if (bb.GetKind() == BB::kBBReturn) { + GenerateReturnBBDefUse(bb); + } + if (bb.IsEmpty()) { + return; + } + bb.DefResetAllBit(); + bb.UseResetAllBit(); + + FOR_BB_INSNS_REV(insn, &bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + if (insn->IsCall()) { + ProcessCallInsnParam(bb); + } + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isDef = regProp->IsRegDef(); + bool isUse = regProp->IsRegUse(); + if (opnd.IsList()) { + ProcessListOpnd(bb, opnd); + } else if (opnd.IsMemoryAccessOperand()) { + ProcessMemOpnd(bb, opnd); + } else if (opnd.IsConditionCode()) { + ProcessCondOpnd(bb); + } else { + CollectLiveInfo(bb, opnd, isDef, isUse); + } + } + } +} + +bool AArch64LiveAnalysis::CleanupBBIgnoreReg(uint32 reg) { + uint32 regNO = reg + R0; + if (regNO < R8 || (R29 <= regNO && regNO <= RZR)) { + return true; + } + return false; +} + +void AArch64LiveAnalysis::GenerateReturnBBDefUse(BB &bb) const { + PrimType returnType = cgFunc->GetFunction().GetReturnType()->GetPrimType(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + if (IsPrimitiveFloat(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(V0), k64BitSize, kRegTyFloat); + CollectLiveInfo(bb, phyOpnd, false, true); + } else if (IsPrimitiveInteger(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R0), k64BitSize, kRegTyInt); + CollectLiveInfo(bb, phyOpnd, false, true); + } +} + +void AArch64LiveAnalysis::ProcessCallInsnParam(BB &bb) const { + /* R0 ~ R7(R0 + 0 ~ R0 + 7) and V0 ~ V7 (V0 + 0 ~ V0 + 7) is parameter register */ + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + for (uint32 i = 0; i < 8; ++i) { + Operand &phyOpndR = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R0 + i), k64BitSize, kRegTyInt); + CollectLiveInfo(bb, phyOpndR, true, false); + Operand &phyOpndV = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(V0 + i), k64BitSize, kRegTyFloat); + CollectLiveInfo(bb, phyOpndV, true, false); + } +} + +void AArch64LiveAnalysis::ProcessListOpnd(BB &bb, Operand &opnd) const { + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, false, true); + } +} + +void AArch64LiveAnalysis::ProcessMemOpnd(BB &bb, Operand &opnd) const { + MemOperand &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + CollectLiveInfo(bb, *base, false, true); + } + if (offset != nullptr) { + CollectLiveInfo(bb, *offset, false, true); + } +} + +void AArch64LiveAnalysis::ProcessCondOpnd(BB &bb) const { + Operand &rflag = cgFunc->GetOrCreateRflag(); + CollectLiveInfo(bb, rflag, false, true); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_lsra.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_lsra.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35262d498121e4b7d1ff6764547636b7abd532ea --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_lsra.cpp @@ -0,0 +1,2643 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ + +#include "riscv64_lsra.h" +#include +#include +#include "riscv64_color_ra.h" +#include "riscv64_operand.h" +#include "riscv64_insn.h" +#include "riscv64_cg.h" + +namespace maplebe { +/* + * ================== + * = Linear Scan RA + * ================== + */ +#define LSRA_DUMP CG_DEBUG_FUNC(cgFunc) + +namespace { +constexpr uint32 kSpilled = 1; +constexpr uint32 kSpecialIntSpillReg = 16; +constexpr uint32 kMinLiveIntervalLength = 20; +constexpr uint32 kPrintedActiveListLength = 10; +constexpr uint32 kMinRangesSize = 2; +} + +#define IN_SPILL_RANGE \ + (cgFunc->GetName().find(CGOptions::GetDumpFunc()) != std::string::npos && ++debugSpillCnt && \ + (CGOptions::GetSpillRangesBegin() < debugSpillCnt) && (debugSpillCnt < CGOptions::GetSpillRangesEnd())) + +#undef LSRA_GRAPH + +/* + * This LSRA implementation is an interpretation of the [Poletto97] paper. + * BFS BB ordering is used to order the instructions. The live intervals are vased on + * this instruction order. All vreg defines should come before an use, else a warning is + * given. + * Live interval is traversed in order from lower instruction order to higher order. + * When encountering a live interval for the first time, it is assumed to be live and placed + * inside the 'active' structure until the vreg's last access. During the time a vreg + * is in 'active', the vreg occupies a physical register allocation and no other vreg can + * be allocated the same physical register. + */ +void LSRALinearScanRegAllocator::PrintRegSet(const MapleSet &set, const std::string &str) const { + LogInfo::MapleLogger() << str; + for (auto reg : set) { + LogInfo::MapleLogger() << " " << reg; + } + LogInfo::MapleLogger() << "\n"; +} + +bool LSRALinearScanRegAllocator::CheckForReg(Operand &opnd, Insn &insn, LiveInterval &li, regno_t regNO, + bool isDef) const { + if (!opnd.IsRegister()) { + return false; + } + auto ®Opnd = static_cast(opnd); + if (regOpnd.GetRegisterType() == kRegTyCc || regOpnd.GetRegisterType() == kRegTyVary) { + return false; + } + if (regOpnd.GetRegisterNumber() == regNO) { + LogInfo::MapleLogger() << "set object circle at " << insn.GetId() << "," << li.GetRegNO() << + " size 5 fillcolor rgb \""; + if (isDef) { + LogInfo::MapleLogger() << "black\"\n"; + } else { + LogInfo::MapleLogger() << "orange\"\n"; + } + } + return true; +} + +/* + * This is a support routine to compute the overlapping live intervals in graph form. + * The output file can be viewed by gnuplot. + * Despite the function name of LiveRanges, it is using live intervals. + */ +void LSRALinearScanRegAllocator::PrintLiveRanges() const { + /* ================= Output to plot.pg =============== */ + std::ofstream out("plot.pg"); + CHECK_FATAL(out.is_open(), "Failed to open output file: plot.pg"); + std::streambuf *coutBuf = LogInfo::MapleLogger().rdbuf(); /* old buf */ + LogInfo::MapleLogger().rdbuf(out.rdbuf()); /* new buf */ + + LogInfo::MapleLogger() << "#!/usr/bin/gnuplot\n"; + LogInfo::MapleLogger() << "#maxInsnNum " << maxInsnNum << "\n"; + LogInfo::MapleLogger() << "#minVregNum " << minVregNum << "\n"; + LogInfo::MapleLogger() << "#maxVregNum " << maxVregNum << "\n"; + LogInfo::MapleLogger() << "reset\nset terminal png\n"; + LogInfo::MapleLogger() << "set xrange [1:" << maxInsnNum << "]\n"; + LogInfo::MapleLogger() << "set grid\nset style data linespoints\n"; + LogInfo::MapleLogger() << "set datafile missing '0'\n"; + std::vector> graph; + graph.resize(maxVregNum); + for (uint32 i = 0; i < maxVregNum; ++i) { + graph[i].resize(maxInsnNum); + } + uint32 minY = 0xFFFFFFFF; + uint32 maxY = 0; + for (auto *li : liveIntervalsArray) { + if (li == nullptr || li->GetRegNO() == 0) { + continue; + } + uint32 regNO = li->GetRegNO(); + if ((li->GetLastUse() - li->GetFirstDef()) < kMinLiveIntervalLength) { + continue; + } + if (regNO < minY) { + minY = regNO; + } + if (regNO > maxY) { + maxY = regNO; + } + uint32 n; + for (n = 0; n <= (li->GetFirstDef() - 1); ++n) { + graph[regNO - minVregNum][n] = 0; + } + if (li->GetLastUse() >= n) { + for (; n <= (li->GetLastUse() - 1); ++n) { + graph[regNO - minVregNum][n] = regNO; + } + } + for (; n < maxInsnNum; ++n) { + graph[regNO - minVregNum][n] = 0; + } + + for (auto *bb : sortedBBs) { + FOR_BB_INSNS(insn, bb) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 iSecond = 0; iSecond < opndNum; ++iSecond) { + Operand &opnd = insn->GetOperand(iSecond); + if (opnd.IsList()) { + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr && !CheckForReg(*base, *insn, *li, regNO, false)) { + continue; + } + if (offset != nullptr && !CheckForReg(*offset, *insn, *li, regNO, false)) { + continue; + } + } else { + bool isDef = static_cast(md->operand[iSecond])->IsRegDef(); + (void)CheckForReg(opnd, *insn, *li, regNO, isDef); + } + } + } + } + } + LogInfo::MapleLogger() << "set yrange [" << (minY - 1) << ":" << (maxY + 1) << "]\n"; + + LogInfo::MapleLogger() << "plot \"plot.dat\" using 1:2 title \"R" << minVregNum << "\""; + for (uint32 i = 1; i < (maxVregNum - minVregNum + 1); ++i) { + LogInfo::MapleLogger() << ", \\\n\t\"\" using 1:" << (i + kDivide2) << " title \"R" << (minVregNum + i) << "\""; + } + LogInfo::MapleLogger() << ";\n"; + + /* ================= Output to plot.dat =============== */ + std::ofstream out2("plot.dat"); + CHECK_FATAL(out2.is_open(), "Failed to open output file: plot.dat"); + LogInfo::MapleLogger().rdbuf(out2.rdbuf()); /* new buf */ + LogInfo::MapleLogger() << "##reg"; + for (uint32 i = minVregNum; i <= maxVregNum; ++i) { + LogInfo::MapleLogger() << " R" << i; + } + LogInfo::MapleLogger() << "\n"; + for (uint32 n = 0; n < maxInsnNum; ++n) { + LogInfo::MapleLogger() << (n + 1); + for (uint32 i = minVregNum; i <= maxVregNum; ++i) { + LogInfo::MapleLogger() << " " << graph[i - minVregNum][n]; + } + LogInfo::MapleLogger() << "\n"; + } + LogInfo::MapleLogger().rdbuf(coutBuf); +} + +void LSRALinearScanRegAllocator::PrintLiveInterval(LiveInterval &li, const std::string &str) const { + LogInfo::MapleLogger() << str << "\n"; + if (li.GetIsCall() != nullptr) { + LogInfo::MapleLogger() << " firstDef " << li.GetFirstDef(); + LogInfo::MapleLogger() << " isCall"; + } else if (li.GetPhysUse()) { + LogInfo::MapleLogger() << "\tregNO " << li.GetRegNO(); + LogInfo::MapleLogger() << " firstDef " << li.GetFirstDef(); + LogInfo::MapleLogger() << " physUse " << li.GetPhysUse(); + LogInfo::MapleLogger() << " endByCall " << li.IsEndByCall(); + } else { + /* show regno/firstDef/lastUse with 5/8/8 width respectively */ + LogInfo::MapleLogger() << "\tregNO " << std::setw(5) << li.GetRegNO(); + LogInfo::MapleLogger() << " firstDef " << std::setw(8) << li.GetFirstDef(); + LogInfo::MapleLogger() << " lastUse " << std::setw(8) << li.GetLastUse(); + LogInfo::MapleLogger() << " assigned " << li.GetAssignedReg(); + LogInfo::MapleLogger() << " refCount " << li.GetRefCount(); + LogInfo::MapleLogger() << " priority " << li.GetPriority(); + } + LogInfo::MapleLogger() << " object_address 0x" << std::hex << &li << std::dec << "\n"; +} + +void LSRALinearScanRegAllocator::PrintParamQueue(const std::string &str) { + LogInfo::MapleLogger() << str << "\n"; + for (SingleQue &que : intParamQueue) { + if (que.empty()) { + continue; + } + LiveInterval *li = que.front(); + LiveInterval *last = que.back(); + PrintLiveInterval(*li, ""); + while (li != last) { + que.pop_front(); + que.push_back(li); + li = que.front(); + PrintLiveInterval(*li, ""); + } + que.pop_front(); + que.push_back(li); + } +} + +void LSRALinearScanRegAllocator::PrintCallQueue(const std::string &str) const { + LogInfo::MapleLogger() << str << "\n"; + for (auto *li : callList) { + PrintLiveInterval(*li, ""); + } +} + +void LSRALinearScanRegAllocator::PrintActiveList(const std::string &str, uint32 len) const { + uint32 count = 0; + LogInfo::MapleLogger() << str << " " << active.size() << "\n"; + for (auto *li : active) { + PrintLiveInterval(*li, ""); + ++count; + if ((len != 0) && (count == len)) { + break; + } + } +} + +void LSRALinearScanRegAllocator::PrintActiveListSimple() const { + for (const auto *li : active) { + uint32 assignedReg = li->GetAssignedReg(); + if (li->GetStackSlot() == kSpilled) { + assignedReg = kSpecialIntSpillReg; + } + LogInfo::MapleLogger() << li->GetRegNO() << "(" << assignedReg << ", "; + if (li->GetPhysUse()) { + LogInfo::MapleLogger() << "p) "; + } else { + LogInfo::MapleLogger() << li->GetFirstAcrossedCall(); + } + LogInfo::MapleLogger() << "<" << li->GetFirstDef() << "," << li->GetLastUse() << ">) "; + } + LogInfo::MapleLogger() << "\n"; +} + +void LSRALinearScanRegAllocator::PrintLiveIntervals() const { + for (auto *li : liveIntervalsArray) { + if (li == nullptr || li->GetRegNO() == 0) { + continue; + } + PrintLiveInterval(*li, ""); + } + LogInfo::MapleLogger() << "\n"; +} + +void LSRALinearScanRegAllocator::DebugCheckActiveList() const { + LiveInterval *prev = nullptr; + for (auto *li : active) { + if (prev != nullptr) { + if ((li->GetRegNO() <= V7) && (prev->GetRegNO() > V7)) { + if (li->GetFirstDef() < prev->GetFirstDef()) { + LogInfo::MapleLogger() << "ERRer: active list with out of order phys + vreg\n"; + PrintLiveInterval(*prev, "prev"); + PrintLiveInterval(*li, "current"); + PrintActiveList("Active", kPrintedActiveListLength); + } + } + if ((li->GetRegNO() <= V7) && (prev->GetRegNO() <= V7)) { + if (li->GetFirstDef() < prev->GetFirstDef()) { + LogInfo::MapleLogger() << "ERRer: active list with out of order phys reg use\n"; + PrintLiveInterval(*prev, "prev"); + PrintLiveInterval(*li, "current"); + PrintActiveList("Active", kPrintedActiveListLength); + } + } + } else { + prev = li; + } + } +} + +/* + * Prepare the free physical register pool for allocation. + * When a physical register is allocated, it is removed from the pool. + * The physical register is re-inserted into the pool when the associated live + * interval has ended. + */ +void LSRALinearScanRegAllocator::InitFreeRegPool() { + for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) { + /* special handle for R9 due to MRT_CallSlowNativeExt */ + if (!AArch64Abi::IsAvailableReg(static_cast(regNO)) || regNO == R9) { + continue; + } + if (AArch64isa::IsGPRegister(static_cast(regNO))) { + /* when yieldpoint is enabled, x19 is reserved. */ + if (IsYieldPointReg(static_cast(regNO))) { + continue; + } + /* (15), 16, 17 for spill */ + if (AArch64Abi::IsSpillRegInRA(static_cast(regNO), needExtraSpillReg)) { + intSpillRegSet.push_back(regNO - R0); + continue; + } + /* 0 to 7 for parameters */ + if (AArch64Abi::IsParamReg(static_cast(regNO))) { + (void)intParamRegSet.insert(regNO - R0); + intParamMask |= 1u << (regNO - R0); + } else if (AArch64Abi::IsCalleeSavedReg(static_cast(regNO))) { + (void)intCalleeRegSet.insert(regNO - R0); + intCalleeMask |= 1u << (regNO - R0); + } else { + (void)intCallerRegSet.insert(regNO - R0); + intCallerMask |= 1u << (regNO - R0); + } + } else { + /* 30, 31 for spill */ + if (AArch64Abi::IsSpillRegInRA(static_cast(regNO), needExtraSpillReg)) { + fpSpillRegSet.push_back(regNO - V0); + continue; + } + /* 0 to 7 for parameters */ + if (AArch64Abi::IsParamReg(static_cast(regNO))) { + (void)fpParamRegSet.insert(regNO - V0); + fpParamMask |= 1u << (regNO - V0); + } else if (AArch64Abi::IsCalleeSavedReg(static_cast(regNO))) { + (void)fpCalleeRegSet.insert(regNO - V0); + fpCalleeMask |= 1u << (regNO - V0); + } else { + (void)fpCallerRegSet.insert(regNO - V0); + fpCallerMask |= 1u << (regNO - V0); + } + } + } + + if (LSRA_DUMP) { + PrintRegSet(intCallerRegSet, "ALLOCATABLE_INT_CALLER"); + PrintRegSet(intCalleeRegSet, "ALLOCATABLE_INT_CALLEE"); + PrintRegSet(intParamRegSet, "ALLOCATABLE_INT_PARAM"); + PrintRegSet(fpCallerRegSet, "ALLOCATABLE_FP_CALLER"); + PrintRegSet(fpCalleeRegSet, "ALLOCATABLE_FP_CALLEE"); + PrintRegSet(fpParamRegSet, "ALLOCATABLE_FP_PARAM"); + LogInfo::MapleLogger() << "INT_SPILL_REGS"; + for (uint32 intSpillRegNO : intSpillRegSet) { + LogInfo::MapleLogger() << " " << intSpillRegNO; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "FP_SPILL_REGS"; + for (uint32 fpSpillRegNO : fpSpillRegSet) { + LogInfo::MapleLogger() << " " << fpSpillRegNO; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << std::hex; + LogInfo::MapleLogger() << "INT_CALLER_MASK " << intCallerMask << "\n"; + LogInfo::MapleLogger() << "INT_CALLEE_MASK " << intCalleeMask << "\n"; + LogInfo::MapleLogger() << "INT_PARAM_MASK " << intParamMask << "\n"; + LogInfo::MapleLogger() << "FP_CALLER_FP_MASK " << fpCallerMask << "\n"; + LogInfo::MapleLogger() << "FP_CALLEE_FP_MASK " << fpCalleeMask << "\n"; + LogInfo::MapleLogger() << "FP_PARAM_FP_MASK " << fpParamMask << "\n"; + LogInfo::MapleLogger() << std::dec; + } +} + +/* Remember calls for caller/callee allocation. */ +void LSRALinearScanRegAllocator::RecordCall(Insn &insn) { + /* Maintain call at the beginning of active list */ + auto *li = cgFunc->GetMemoryPool()->New(alloc); + li->SetFirstDef(insn.GetId()); + li->SetIsCall(insn); + callList.push_back(li); +} + +/* Handle parameter registers for live interval. */ +void LSRALinearScanRegAllocator::RecordPhysRegs(const RegOperand ®Opnd, uint32 insnNum, bool isDef) { + RegType regType = regOpnd.GetRegisterType(); + uint32 regNO = regOpnd.GetRegisterNumber(); + if (regType == kRegTyCc || regType == kRegTyVary) { + return; + } + + if (IsUntouchableReg(regNO) || regOpnd.IsConstReg()) { + return; + } + + if (regNO == R30 || regNO == R9) { + return; + } + + bool maybeParam = (regType == kRegTyInt && intParamQueue[regNO - R0].empty()) || + (regType == kRegTyFloat && fpParamQueue[regNO - V0].empty()); + + if (isDef) { + /* parameter register def is assumed to be live until a call. */ + auto *li = cgFunc->GetMemoryPool()->New(alloc); + li->SetRegNO(regNO); + li->SetRegType(regType); + li->SetStackSlot(0xFFFFFFFF); + li->SetFirstDef(insnNum); + li->SetPhysUse(insnNum); + li->SetAssignedReg(regNO); + if (regType == kRegTyInt) { + intParamQueue[regNO - R0].push_back(li); + } else { + fpParamQueue[regNO - V0].push_back(li); + } + } else if (maybeParam) { + CHECK_FATAL(false, "impossible"); + } else { + if (regType == kRegTyInt) { + if ((regNO - R0) >= intParamQueue.size()) { + CHECK_FATAL(false, "index out of range in LSRALinearScanRegAllocator::RecordPhysRegs"); + } + LiveInterval *li = intParamQueue[regNO - R0].back(); + li->SetPhysUse(insnNum); + } else { + LiveInterval *li = fpParamQueue[regNO - V0].back(); + li->SetPhysUse(insnNum); + } + } +} + +void LSRALinearScanRegAllocator::UpdateLiveIntervalState(const BB &bb, LiveInterval &li) { + if (bb.IsCatch()) { + li.SetInCatchState(); + } else { + li.SetNotInCatchState(); + } + + if (bb.GetInternalFlag1()) { + li.SetInCleanupState(); + } else { + li.SetNotInCleanupState(bb.GetId() == 1); + } +} + +/* main entry function for live interval computation. */ +void LSRALinearScanRegAllocator::SetupLiveInterval(Operand &opnd, Insn &insn, bool isDef, uint32 &nUses) { + if (!opnd.IsRegister()) { + return; + } + auto ®Opnd = static_cast(opnd); + uint32 insnNum = insn.GetId(); + if (regOpnd.IsPhysicalRegister()) { + RecordPhysRegs(regOpnd, insnNum, isDef); + return; + } + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyCc || regType == kRegTyVary) { + return; + } + + LiveInterval *li = nullptr; + uint32 regNO = regOpnd.GetRegisterNumber(); + if (liveIntervalsArray[regNO] == nullptr) { + li = cgFunc->GetMemoryPool()->New(alloc); + li->SetRegNO(regNO); + li->SetStackSlot(0xFFFFFFFF); + liveIntervalsArray[regNO] = li; + } else { + li = liveIntervalsArray[regNO]; + } + li->SetRegType(regType); + + BB *curBB = insn.GetBB(); + if (isDef) { + if (li->GetFirstDef() == 0) { + li->SetFirstDef(insnNum); + li->SetLastUse(insnNum + 1); + } else if (!curBB->IsUnreachable()) { + if (li->GetLastUse() < insnNum || li->IsUseBeforeDef()) { + li->SetLastUse(insnNum + 1); + } + } + /* + * try-catch related + * Not set when extending live interval with bb's livein in ComputeLiveInterval. + */ + li->SetResultCount(li->GetResultCount() + 1); + } else { + if (li->GetFirstDef() == 0) { + ASSERT(false, "SetupLiveInterval: use before def"); + } + /* + * In ComputeLiveInterval when extending live interval using + * live-out information, li created does not have a type. + */ + if (!curBB->IsUnreachable()) { + li->SetLastUse(insnNum); + } + ++nUses; + } + UpdateLiveIntervalState(*curBB, *li); + + li->SetRefCount(li->GetRefCount() + 1); + + uint32 index = regNO / (sizeof(uint64) * k8ByteSize); + uint64 bit = regNO % (sizeof(uint64) * k8ByteSize); + if ((regUsedInBB[index] & (static_cast(1) << bit)) != 0) { + li->SetMultiUseInBB(true); + } + regUsedInBB[index] |= (static_cast(1) << bit); + + if (minVregNum > regNO) { + minVregNum = regNO; + } + if (maxVregNum < regNO) { + maxVregNum = regNO; + } + + /* setup the def/use point for it */ + ASSERT(regNO < liveIntervalsArray.size(), "out of range of vector liveIntervalsArray"); +} + +/* + * Support 'hole' in LSRA. + * For a live interval, there might be multiple segments of live ranges, + * and between these segments a 'hole'. + * Some other short lived vreg can go into these 'holes'. + * + * from : starting instruction sequence id + * to : ending instruction sequence id + */ +void LSRALinearScanRegAllocator::LiveInterval::AddRange(uint32 from, uint32 to) { + if (ranges.empty()) { + ranges.push_back(std::pair(from, to)); + } else { + if (to < ranges.front().first) { + (void)ranges.insert(ranges.begin(), std::pair(from, to)); + } else if (to >= ranges.front().second && from < ranges.front().first) { + ranges.front().first = from; + ranges.front().second = to; + } else if (to >= ranges.front().first && from < ranges.front().first) { + ranges.front().first = from; + } else if (from > ranges.front().second) { + ASSERT(false, "No possible on reverse traverse."); + } + } +} + +void LSRALinearScanRegAllocator::LiveInterval::AddUsePos(uint32 pos) { + (void)usePositions.insert(pos); +} + +/* See if a vreg can fit in one of the holes of a longer live interval. */ +uint32 LSRALinearScanRegAllocator::FillInHole(LiveInterval &li) { + MapleSet::iterator it; + for (it = active.begin(); it != active.end(); ++it) { + auto *ili = static_cast(*it); + + /* + * If ili is part in cleanup, the hole info will be not correct, + * since cleanup bb do not have edge to normal func bb, and the + * live-out info will not correct. + */ + if (!ili->IsAllOutCleanup() || ili->IsInCatch()) { + continue; + } + + if (ili->GetRegType() != li.GetRegType() || ili->GetStackSlot() != 0xFFFFFFFF || ili->GetLiChild() != nullptr || + ili->GetAssignedReg() == 0) { + continue; + } + for (const auto &inner : ili->GetHoles()) { + if (inner.first <= li.GetFirstDef() && inner.second >= li.GetLastUse()) { + ili->SetLiChild(&li); + li.SetLiParent(ili); + li.SetAssignedReg(ili->GetAssignedReg()); + /* If assigned physical register is callee save register, set shouldSave false; */ + regno_t phyReg = kRinvalid; + if (li.GetRegType() == kRegTyInt || li.GetRegType() == kRegTyFloat) { + phyReg = li.GetAssignedReg(); + } else { + ASSERT(false, "FillInHole, Invalid register type"); + } + + if (AArch64Abi::IsAvailableReg(static_cast(phyReg)) && + AArch64Abi::IsCalleeSavedReg(static_cast(phyReg))) { + li.SetShouldSave(false); + } + return ili->GetAssignedReg(); + } else if (inner.first > li.GetLastUse()) { + break; + } + } + } + return 0; +} + +void LSRALinearScanRegAllocator::SetupIntervalRangesByOperand(Operand &opnd, const Insn &insn, uint32 blockFrom, + bool isDef, bool isUse) { + auto ®Opnd = static_cast(opnd); + RegType regType = regOpnd.GetRegisterType(); + if (regType != kRegTyCc && regType != kRegTyVary) { + regno_t regNO = regOpnd.GetRegisterNumber(); + if (regNO > kNArmRegisters) { + if (isDef) { + if (!liveIntervalsArray[regNO]->GetRanges().empty()) { + liveIntervalsArray[regNO]->GetRanges().front().first = insn.GetId(); + liveIntervalsArray[regNO]->UsePositionsInsert(insn.GetId()); + } + } + if (isUse) { + liveIntervalsArray[regNO]->AddRange(blockFrom, insn.GetId()); + liveIntervalsArray[regNO]->UsePositionsInsert(insn.GetId()); + } + } + } +} + +void LSRALinearScanRegAllocator::BuildIntervalRangesForEachOperand(const Insn &insn, uint32 blockFrom) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn).GetMachineOpcode()]; + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr && base->IsRegister()) { + SetupIntervalRangesByOperand(*base, insn, blockFrom, false, true); + } + if (offset != nullptr && offset->IsRegister()) { + SetupIntervalRangesByOperand(*offset, insn, blockFrom, false, true); + } + } else if (opnd.IsRegister()) { + bool isDef = static_cast(md->operand[i])->IsRegDef(); + bool isUse = static_cast(md->operand[i])->IsRegUse(); + SetupIntervalRangesByOperand(opnd, insn, blockFrom, isDef, isUse); + } + } +} + +/* Support finding holes by searching for ranges where holes exist. */ +void LSRALinearScanRegAllocator::BuildIntervalRanges() { + uint32 bbIdx = sortedBBs.size(); + if (bbIdx == 0) { + return; + } + + do { + --bbIdx; + BB *bb = sortedBBs[bbIdx]; + if (bb->GetFirstInsn() == nullptr || bb->GetLastInsn() == nullptr) { + continue; + } + uint32 blockFrom = bb->GetFirstInsn()->GetId(); + uint32 blockTo = bb->GetLastInsn()->GetId() + 1; + + for (auto regNO : bb->GetLiveOutRegNO()) { + if (regNO < kNArmRegisters) { + /* Do not consider physical regs. */ + continue; + } + liveIntervalsArray[regNO]->AddRange(blockFrom, blockTo); + } + + FOR_BB_INSNS_REV(insn, bb) { + BuildIntervalRangesForEachOperand(*insn, blockFrom); + } + } while (bbIdx != 0); + + /* Build holes. */ + for (uint32 i = 0; i < cgFunc->GetMaxVReg(); ++i) { + LiveInterval *li = liveIntervalsArray[i]; + if (li == nullptr) { + continue; + } + if (li->GetRangesSize() < kMinRangesSize) { + continue; + } + + auto it = li->GetRanges().begin(); + auto itPrev = it++; + for (; it != li->GetRanges().end(); ++it) { + if (((*it).first - (*itPrev).second) > kMinRangesSize) { + li->HolesPushBack((*itPrev).second, (*it).first); + } + itPrev = it; + } + } +} + +/* Extend live interval with live-in info */ +void LSRALinearScanRegAllocator::UpdateLiveIntervalByLiveIn(const BB &bb, uint32 insnNum) { + for (const auto ®NO : bb.GetLiveInRegNO()) { + if (regNO < kNArmRegisters) { + /* Do not consider physical regs. */ + continue; + } + LiveInterval *liOuter = liveIntervalsArray[regNO]; + if (liOuter != nullptr || (bb.IsEmpty() && bb.GetId() != 1)) { + continue; + } + /* + * try-catch related + * Since it is livein but not seen before, its a use before def + */ + auto *li = cgFunc->GetMemoryPool()->New(alloc); + li->SetRegNO(regNO); + li->SetStackSlot(0xFFFFFFFF); + liveIntervalsArray[regNO] = li; + li->SetFirstDef(insnNum); + li->SetUseBeforeDef(true); + if (!bb.IsUnreachable() && static_cast(cgFunc)->GetCatchRegno() != regNO) { + if (bb.GetId() != 1) { + LogInfo::MapleLogger() << "ERROR: " << regNO << " use before def in bb " << bb.GetId() << " : " << + cgFunc->GetName() << "\n"; + ASSERT(false, "There should only be [use before def in bb 1], temporarily."); + } + LogInfo::MapleLogger() << "WARNING: " << regNO << " use before def in bb " << bb.GetId() << " : " << + cgFunc->GetName() << "\n"; + } + /* + * Need to insert to active list now, as live interval is + * conservatively to start at instruction 1 + */ + (void)active.insert(li); + UpdateLiveIntervalState(bb, *li); + } +} + +/* traverse live in regNO, for each live in regNO create a new liveinterval */ +void LSRALinearScanRegAllocator::UpdateParamLiveIntervalByLiveIn(const BB &bb, uint32 insnNum) { + for (const auto ®NO : bb.GetLiveInRegNO()) { + if (!AArch64Abi::IsParamReg(static_cast(regNO))) { + continue; + } + auto *li = cgFunc->GetMemoryPool()->New(alloc); + li->SetRegNO(regNO); + li->SetStackSlot(0xFFFFFFFF); + li->SetFirstDef(insnNum); + li->SetPhysUse(insnNum); + li->SetAssignedReg(regNO); + if (AArch64isa::IsGPRegister(static_cast(regNO))) { + li->SetRegType(kRegTyInt); + intParamQueue[regNO - R0].push_back(li); + } else { + li->SetRegType(kRegTyFloat); + fpParamQueue[regNO - V0].push_back(li); + } + } +} + +void LSRALinearScanRegAllocator::ComputeLiveIn(BB &bb, uint32 insnNum) { + UpdateLiveIntervalByLiveIn(bb, insnNum); + + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "bb(" << bb.GetId() << ")LIVEOUT:"; + for (const auto &liveOutRegNO : bb.GetLiveOutRegNO()) { + LogInfo::MapleLogger() << " " << liveOutRegNO; + } + LogInfo::MapleLogger() << ".\n"; + LogInfo::MapleLogger() << "bb(" << bb.GetId() << ")LIVEIN:"; + for (const auto &liveInRegNO : bb.GetLiveInRegNO()) { + LogInfo::MapleLogger() << " " << liveInRegNO; + } + LogInfo::MapleLogger() << ".\n"; + } + + regUsedInBBSz = (cgFunc->GetMaxVReg() / (sizeof(uint64) * k8ByteSize) + 1); + regUsedInBB = new uint64[regUsedInBBSz]; + CHECK_FATAL(regUsedInBB != nullptr, "alloc regUsedInBB memory failure."); + errno_t ret = memset_s(regUsedInBB, regUsedInBBSz * sizeof(uint64), 0, regUsedInBBSz * sizeof(uint64)); + if (ret != EOK) { + CHECK_FATAL(false, "call memset_s failed in LSRALinearScanRegAllocator::ComputeLiveInterval()"); + } + + if (bb.GetFirstInsn() == nullptr) { + return; + } + if (!bb.GetEhPreds().empty()) { + bb.InsertLiveInRegNO(R0); + bb.InsertLiveInRegNO(R1); + } + UpdateParamLiveIntervalByLiveIn(bb, insnNum); + if (!bb.GetEhPreds().empty()) { + bb.EraseLiveInRegNO(R0); + bb.EraseLiveInRegNO(R1); + } +} + +void LSRALinearScanRegAllocator::ComputeLiveOut(BB &bb, uint32 insnNum) { + /* + * traverse live out regNO + * for each live out regNO if the last corresponding live interval is created within this bb + * update this lastUse of li to the end of BB + */ + for (const auto ®NO : bb.GetLiveOutRegNO()) { + if (AArch64Abi::IsParamReg(static_cast(regNO))) { + LiveInterval *liOut = nullptr; + if (AArch64isa::IsGPRegister(static_cast(regNO))) { + if (intParamQueue[regNO - R0].empty()) { + continue; + } + liOut = intParamQueue[regNO - R0].back(); + if (bb.GetFirstInsn() && liOut->GetFirstDef() >= bb.GetFirstInsn()->GetId()) { + liOut->SetPhysUse(insnNum); + } + } else { + if (fpParamQueue[regNO - V0].empty()) { + continue; + } + liOut = fpParamQueue[regNO - V0].back(); + if (bb.GetFirstInsn() && liOut->GetFirstDef() >= bb.GetFirstInsn()->GetId()) { + liOut->SetPhysUse(insnNum); + } + } + } + /* Extend live interval with live-out info */ + LiveInterval *li = liveIntervalsArray[regNO]; + if (li != nullptr && !bb.IsEmpty()) { + li->SetLastUse(bb.GetLastInsn()->GetId()); + UpdateLiveIntervalState(bb, *li); + } + } +} + +void LSRALinearScanRegAllocator::ComputeLiveIntervalForEachOperand(Insn &insn) { + uint32 numUses = 0; + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + + /* + * we need to process src opnd first just in case the src/dest vreg are the same and the src vreg belongs to the + * last interval. + */ + int32 lastOpndId = insn.GetOperandSize() - 1; + for (int32 i = lastOpndId; i >= 0; --i) { + Operand &opnd = insn.GetOperand(i); + bool isDef = static_cast(md->operand[i])->IsRegDef(); + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + SetupLiveInterval(*op, insn, isDef, numUses); + } + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + isDef = false; + /* + * ldr(156) (opnd0: reg:V34 class: [F]) (opnd1: Mem:literal: + * .LB_Ljava_2Fnio_2FByteBuffer_3B_7CgetDouble_7C_28_29D2) + */ + if (base != nullptr) { + SetupLiveInterval(*base, insn, isDef, numUses); + } + if (offset != nullptr) { + SetupLiveInterval(*offset, insn, isDef, numUses); + } + } else { + SetupLiveInterval(opnd, insn, isDef, numUses); + } + } + if (numUses >= AArch64Abi::kNormalUseOperandNum) { + needExtraSpillReg = true; + } +} + +/* Preference is to put bracket as 1st char of a newline */ +void LSRALinearScanRegAllocator::ComputeLiveInterval() { + /* + * initialize block ordering + * Can be either breadth first or depth first. + * To avoid use before set, we prefer breadth first + */ + calleeUseCnt.resize(kAllRegNum); + liveIntervalsArray.resize(cgFunc->GetMaxVReg()); + /* LiveInterval queue for each param register */ + lastIntParamLi.resize(AArch64Abi::kNumIntParmRegs); + lastFpParamLi.resize(AArch64Abi::kNumFloatParmRegs); + uint32 insnNum = 1; + for (BB *bb : sortedBBs) { + ComputeLiveIn(*bb, insnNum); + FOR_BB_INSNS(insn, bb) { + insn->SetId(insnNum); + + /* skip comment and debug insn */ + if (insn->IsImmaterialInsn() || !insn->IsMachineInstruction()) { + continue; + } + if (insn->GetMachineOpcode() == MOP_clinit) { + auto &dst = static_cast(insn->GetOperand(0)); + RegOperand &phyOpnd = static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(R30), dst.GetSize(), dst.GetRegisterType()); + insn->SetOperand(0, phyOpnd); + } + + if (insn->IsCall()) { + bool skipCall = false; + if (!insn->IsIndirectCall()) { + Operand *targetOpnd = insn->GetCallTargetOperand(); + ASSERT(targetOpnd != nullptr, "nullptr check"); + if (targetOpnd->IsFuncNameOpnd()) { + auto *target = static_cast(targetOpnd); + const MIRSymbol *funcSt = target->GetFunctionSymbol(); + ASSERT(funcSt->GetSKind() == kStFunc, "funcSt must be a kStFunc"); + if (funcSt->GetName() == "exit") { + skipCall = true; + } + } + } + + if (!skipCall) { + if (!insn->GetIsThrow() || !bb->GetEhSuccs().empty()) { + RecordCall(*insn); + } + } + } + + ComputeLiveIntervalForEachOperand(*insn); + + if (insn->IsCall()) { + auto *a64CGFunc = static_cast(cgFunc); + RegOperand ®R0 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(R0, k64, kRegTyInt); + RegOperand ®V0 = a64CGFunc->GetOrCreatePhysicalRegisterOperand(V0, k64, kRegTyFloat); + /* handle return value for call insn */ + if (insn->GetRetType() == Insn::kRegInt) { + RecordPhysRegs(regR0, insnNum, true); + } else { + RecordPhysRegs(regV0, insnNum, true); + } + } + ++insnNum; + } + + ComputeLiveOut(*bb, insnNum); + + delete[] regUsedInBB; + regUsedInBB = nullptr; + maxInsnNum = insnNum - 1; /* insn_num started from 1 */ + } + + for (auto *li : liveIntervalsArray) { + if (li == nullptr || li->GetRegNO() == 0) { + continue; + } + if (li->GetIsCall() != nullptr || li->GetPhysUse()) { + continue; + } + if (li->GetLastUse() > li->GetFirstDef()) { + li->SetPriority(static_cast(li->GetRefCount()) / static_cast(li->GetLastUse() - li->GetFirstDef())); + } else { + li->SetPriority(static_cast(li->GetRefCount()) / static_cast(li->GetFirstDef() - li->GetLastUse())); + } + } + + if (LSRA_DUMP) { + PrintLiveIntervals(); + } +} + +/* + * Replace replace_reg with rename_reg. + * return true if there is a redefinition that needs to terminate the propagation. + */ +bool LSRALinearScanRegAllocator::PropagateRenameReg(Insn &nInsn, uint32 replaceReg, Operand &renameOpnd) { + uint32 renameReg = static_cast(&renameOpnd)->GetRegisterNumber(); + const AArch64MD *md = &AArch64CG::kMd[static_cast(&nInsn)->GetMachineOpcode()]; + int32 lastOpndId = nInsn.GetOperandSize() - 1; + for (int32 i = lastOpndId; i >= 0; --i) { + Operand &opnd = nInsn.GetOperand(i); + if (opnd.IsList()) { + /* call parameters */ + } else if (opnd.IsMemoryAccessOperand()) { + } else if (opnd.IsRegister()) { + bool isDef = static_cast(md->operand[i])->IsRegDef(); + auto ®Opnd = static_cast(opnd); + uint32 regCandidate = regOpnd.GetRegisterNumber(); + if (isDef) { + /* Continue if both replace_reg & rename_reg are not redefined. */ + if (regCandidate == replaceReg || regCandidate == renameReg) { + return true; + } + } else { + if (regCandidate == replaceReg) { + nInsn.SetOperand(i, renameOpnd); + } + } + } + } + return false; /* false == no redefinition */ +} + +/* + * Perform optimization. + * First propagate x0 in a bb. + * Second propagation see comment in function. + */ +void LSRALinearScanRegAllocator::PropagateX0() { + FOR_ALL_BB(bb, cgFunc) { + Insn *insn = bb->GetFirstInsn(); + while (insn != nullptr && !insn->IsMachineInstruction()) { + insn = insn->GetNext(); + } + + /* + * Propagate x0 from a call return value to a def of x0. + * This eliminates some local reloads under high register pressure, since + * the use has been replaced by x0. + */ + if (insn == nullptr || (insn->GetMachineOpcode() != MOP_xmovrr && insn->GetMachineOpcode() != MOP_wmovrr)) { + continue; + } + auto &movSrc = static_cast(insn->GetOperand(1)); + if (movSrc.GetRegisterNumber() != R0) { + continue; + } + + /* At this point the 1st insn is a mov from x0. */ + auto &movDst = static_cast(insn->GetOperand(0)); + uint32 replaceReg = movDst.GetRegisterNumber(); + + bool redefined = false; + Insn *renameInsn = nullptr; + Operand *renameOpnd = nullptr; + uint32 renameReg = 0; + for (Insn *nInsn = insn->GetNext(); nInsn != nullptr; nInsn = nInsn->GetNext()) { + if (!nInsn->IsMachineInstruction()) { + continue; + } + + if (nInsn->IsCall()) { + break; + } + + /* + * Will continue as long as the reg being replaced is not redefined. + * Does not need to check for x0 redefinition. The mov instruction src + * being replaced already defines x0 and will terminate this loop. + */ + uint32 numResults = nInsn->GetResultNum(); + const AArch64MD *md = &AArch64CG::kMd[static_cast(nInsn)->GetMachineOpcode()]; + if (numResults == 1) { + bool isDef = static_cast(md->operand[0])->IsRegDef(); + if (isDef) { + auto &opnd = static_cast(nInsn->GetOperand(0)); + if (opnd.GetRegisterNumber() == replaceReg) { + redefined = true; + } + } + } else if (numResults > 1) { + ASSERT(numResults <= nInsn->GetOperandSize(), "numResults shouldn't be more than the operands' size here"); + for (uint32 i = 0; i < numResults; ++i) { + bool isDef = static_cast(md->operand[i])->IsRegDef(); + if (isDef) { + auto &opnd = static_cast(nInsn->GetOperand(i)); + if (opnd.GetRegisterNumber() == replaceReg) { + redefined = true; + break; + } + } + } + } + + /* Look for move where src is the register equivalent to x0. */ + if (nInsn->GetMachineOpcode() != MOP_xmovrr && nInsn->GetMachineOpcode() != MOP_wmovrr) { + continue; + } + + Operand &src = nInsn->GetOperand(1); + auto &srcReg = static_cast(src); + if (srcReg.GetRegisterNumber() != replaceReg) { + if (redefined) { + break; + } + continue; + } + + /* Setup for the next optmization pattern. */ + Operand &dst = nInsn->GetOperand(0); + auto &dstReg = static_cast(dst); + if (dstReg.GetRegisterNumber() != R0) { + /* This is to set up for further propagation later. */ + if (srcReg.GetRegisterNumber() == replaceReg) { + if (renameInsn != nullptr) { + redefined = true; + break; + } else { + renameInsn = nInsn; + renameOpnd = &dst; + renameReg = dstReg.GetRegisterNumber(); + } + } + continue; + } + + if (redefined) { + break; + } + nInsn->SetOperand(1, movSrc); + break; + } + + if (redefined || renameInsn == nullptr) { + continue; + } + + /* + * Next pattern to help LSRA. Short cross bb live interval. + * Straight line code. Convert reg2 into bb local. + * bb1 + * mov reg2 <- x0 => mov reg2 <- x0 + * mov reg1 <- reg2 mov reg1 <- reg2 + * call call + * bb2 : livein< reg1 reg2 > + * use reg2 use reg1 + * .... + * reg2 not liveout + * + * Can allocate caller register for reg2. + * + * Further propagation of very short live interval cross bb reg + */ + if (renameReg < kAllRegNum) { /* dont propagate physical reg */ + continue; + } + BB *nextBB = bb->GetNext(); + if (nextBB == nullptr) { + break; + } + if (bb->GetSuccs().size() != 1 || nextBB->GetPreds().size() != 1) { + continue; + } + if (bb->GetSuccs().front() != nextBB || nextBB->GetPreds().front() != bb) { + continue; + } + if (bb->GetLiveOutRegNO().find(replaceReg) == bb->GetLiveOutRegNO().end() || + bb->GetLiveOutRegNO().find(renameReg) == bb->GetLiveOutRegNO().end() || + nextBB->GetLiveOutRegNO().find(replaceReg) != nextBB->GetLiveOutRegNO().end()) { + continue; + } + + /* Replace replace_reg by rename_reg. */ + for (Insn *nInsn = renameInsn->GetNext(); nInsn != nullptr && nInsn != bb->GetLastInsn(); + nInsn = nInsn->GetNext()) { + if (!nInsn->IsMachineInstruction()) { + continue; + } + redefined = PropagateRenameReg(*nInsn, replaceReg, *renameOpnd); + if (redefined) { + break; + } + } + + if (redefined) { + continue; + } + + for (Insn *nInsn = nextBB->GetFirstInsn(); nInsn != nextBB->GetLastInsn(); nInsn = nInsn->GetNext()) { + if (!nInsn->IsMachineInstruction()) { + continue; + } + redefined = PropagateRenameReg(*nInsn, replaceReg, *renameOpnd); + if (redefined) { + break; + } + } + } +} + +/* A physical register is freed at the end of the live interval. Return to pool. */ +void LSRALinearScanRegAllocator::ReturnPregToSet(LiveInterval &li, uint32 preg) { + if (preg == 0) { + return; + } + if (li.GetRegType() == kRegTyInt) { + preg -= R0; + } else if (li.GetRegType() == kRegTyFloat) { + preg -= V0; + } else { + ASSERT(false, "ReturnPregToSet: Invalid reg type"); + } + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "\trestoring preg " << preg << " as allocatable\n"; + } + uint32 mask = 1u << preg; + if (preg == kSpecialIntSpillReg && li.GetStackSlot() == 0xFFFFFFFF) { + /* this reg is temporary used for liveinterval which lastUse-firstDef == 1 */ + return; + } + if (li.GetRegType() == kRegTyInt) { + if (intCallerMask & mask) { + (void)intCallerRegSet.insert(preg); + } else if (intCalleeMask & mask) { + (void)intCalleeRegSet.insert(preg); + } else if (intParamMask & mask) { + (void)intParamRegSet.insert(preg); + } else { + ASSERT(false, "ReturnPregToSet: Unknown caller/callee type"); + } + } else if (fpCallerMask & mask) { + (void)fpCallerRegSet.insert(preg); + } else if (fpCalleeMask & mask) { + (void)fpCalleeRegSet.insert(preg); + } else if (fpParamMask & mask) { + (void)fpParamRegSet.insert(preg); + } else { + ASSERT(false, "ReturnPregToSet invalid physical register"); + } +} + +/* A physical register is removed from allocation as it is assigned. */ +void LSRALinearScanRegAllocator::ReleasePregToSet(LiveInterval &li, uint32 preg) { + if (preg == 0) { + return; + } + if (li.GetRegType() == kRegTyInt) { + preg -= R0; + } else if (li.GetRegType() == kRegTyFloat) { + preg -= V0; + } else { + ASSERT(false, "ReleasePregToSet: Invalid reg type"); + } + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "\treleasing preg " << preg << " as allocatable\n"; + } + uint32 mask = 1u << preg; + if (preg == kSpecialIntSpillReg && li.GetStackSlot() == 0xFFFFFFFF) { + /* this reg is temporary used for liveinterval which lastUse-firstDef == 1 */ + return; + } + if (li.GetRegType() == kRegTyInt) { + if (intCallerMask & mask) { + intCallerRegSet.erase(preg); + } else if (intCalleeMask & mask) { + intCalleeRegSet.erase(preg); + } else if (intParamMask & mask) { + intParamRegSet.erase(preg); + } else { + ASSERT(false, "ReleasePregToSet: Unknown caller/callee type"); + } + } else if (fpCallerMask & mask) { + fpCallerRegSet.erase(preg); + } else if (fpCalleeMask & mask) { + fpCalleeRegSet.erase(preg); + } else if (fpParamMask & mask) { + fpParamRegSet.erase(preg); + } else { + ASSERT(false, "ReleasePregToSet invalid physical register"); + } +} + +/* update active in retire */ +void LSRALinearScanRegAllocator::UpdateActiveAtRetirement(uint32 insnID) { + /* Retire live intervals from active list */ + MapleSet::iterator it; + for (it = active.begin(); it != active.end(); /* erase will update */) { + auto *li = static_cast(*it); + if (li->GetLastUse() > insnID) { + break; + } + /* Is it phys reg? */ + if ((li->GetRegNO() >= R0) && (li->GetRegNO() <= R7)) { + if (li->GetPhysUse() != 0 && li->GetPhysUse() <= insnID) { + it = active.erase(it); + if (li->GetPhysUse() != 0) { + ReturnPregToSet(*li, li->GetRegNO()); + } + if (LSRA_DUMP) { + PrintLiveInterval(*li, "\tRemoving phys_reg li\n"); + } + } else { + ++it; + } + continue; + } else if ((li->GetRegNO() >= V0) && (li->GetRegNO() <= V7)) { + if (li->GetPhysUse() != 0 && li->GetPhysUse() <= insnID) { + it = active.erase(it); + if (li->GetPhysUse() != 0) { + ReturnPregToSet(*li, li->GetRegNO()); + } + if (LSRA_DUMP) { + PrintLiveInterval(*li, "\tRemoving phys_reg li\n"); + } + } else { + ++it; + } + continue; + } + /* + * live interval ended for this reg in active + * release physical reg assigned to free reg pool + */ + if (li->GetLiParent() != nullptr) { + li->SetLiParentChild(nullptr); + li->SetLiParent(nullptr); + } else { + ReturnPregToSet(*li, li->GetAssignedReg()); + } + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "Removing " << "(" << li->GetAssignedReg() << ")" << "from regset\n"; + PrintLiveInterval(*li, "\tRemoving virt_reg li\n"); + } + it = active.erase(it); + } +} + +/* Remove a live interval from 'active' list. */ +void LSRALinearScanRegAllocator::RetireFromActive(const Insn &insn) { + if ((insn.GetMachineOpcode() == MOP_adrp_ldr && insn.GetNext() && + insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail) || + (insn.GetMachineOpcode() == MOP_clinit_tail)) { + /* Cannot spill for clinit pair */ + } else if (spillAll) { + return; + } + uint32 insnID = insn.GetId(); + /* + * active list is sorted based on increasing lastUse + * any operand whose use is greater than current + * instruction number is still in use. + * If the use is less than or equal to instruction number + * then it is possible to retire this live interval and + * reclaim the physical register associated with it. + */ + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "RetireFromActive instr_num " << insnID << "\n"; + } + /* Retire call from call queue */ + for (auto it = callList.begin(); it != callList.end();) { + auto *li = static_cast(*it); + if (li->GetFirstDef() > insnID) { + break; + } + callList.pop_front(); + /* at here, it is invalidated */ + it = callList.begin(); + } + + for (uint32 i = 0; i < intParamQueue.size(); ++i) { + /* push back the param not yet use <- as only one is popped, just push it back again */ + if (lastIntParamLi[i] != nullptr) { + intParamQueue[i].push_front(lastIntParamLi[i]); + (void)intParamRegSet.insert(i); + lastIntParamLi[i] = nullptr; + } + if (lastFpParamLi[i] != nullptr) { + fpParamQueue[i].push_front(lastFpParamLi[i]); + (void)fpParamRegSet.insert(i); + lastFpParamLi[i] = nullptr; + } + } + + UpdateActiveAtRetirement(insnID); +} + +/* the return value is a physical reg */ +uint32 LSRALinearScanRegAllocator::GetRegFromSet(MapleSet &set, regno_t offset, LiveInterval &li, + regno_t forcedReg) { + uint32 regNO; + if (forcedReg) { + /* forced_reg is a caller save reg */ + regNO = forcedReg; + } else { + CHECK(!set.empty(), "set is null in LSRALinearScanRegAllocator::GetRegFromSet"); + regNO = *(set.begin()); + } + set.erase(regNO); + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "\tAssign " << regNO << "\n"; + } + regNO += offset; /* Mapping into Maplecg reg */ + li.SetAssignedReg(regNO); + if (LSRA_DUMP) { + PrintRegSet(set, "Reg Set AFTER"); + PrintLiveInterval(li, "LiveInterval after assignment"); + } + return regNO; +} + +/* + * Handle adrp register assignment. Use the same register for the next + * instruction. + */ +uint32 LSRALinearScanRegAllocator::AssignSpecialPhysRegPattern(Insn &insn, LiveInterval &li) { + MOperator opCode = insn.GetMachineOpcode(); + if (opCode != MOP_xadrp) { + return 0; + } + Insn *nInsn = insn.GetNext(); + if (nInsn == nullptr || !nInsn->IsMachineInstruction() || nInsn->IsDMBInsn()) { + return 0; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(nInsn)->GetMachineOpcode()]; + bool isDef = md->GetOperand(0)->IsRegDef(); + if (!isDef) { + return 0; + } + Operand &opnd = nInsn->GetOperand(0); + if (!opnd.IsRegister()) { + return 0; + } + auto ®Opnd = static_cast(opnd); + if (!regOpnd.IsPhysicalRegister()) { + return 0; + } + uint32 regNO = regOpnd.GetRegisterNumber(); + if (!(regNO >= R0 && regNO <= R7)) { + return 0; + } + + /* next insn's dest is a physical param reg 'regNO' */ + bool match = false; + uint32 opndNum = nInsn->GetOperandSize(); + for (uint32 i = 1; i < opndNum; ++i) { + Operand &src = nInsn->GetOperand(i); + if (src.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(src); + Operand *base = memOpnd.GetBaseRegister(); + if (base != nullptr) { + auto *regSrc = static_cast(base); + uint32 srcRegNO = regSrc->GetRegisterNumber(); + if (li.GetRegNO() == srcRegNO) { + match = true; + break; + } + } + Operand *offset = memOpnd.GetIndexRegister(); + if (offset != nullptr) { + auto *regSrc = static_cast(offset); + uint32 srcRegNO = regSrc->GetRegisterNumber(); + if (li.GetRegNO() == srcRegNO) { + match = true; + break; + } + } + } else if (src.IsRegister()) { + auto ®Src = static_cast(src); + uint32 srcRegNO = regSrc.GetRegisterNumber(); + if (li.GetRegNO() == srcRegNO) { + bool srcIsDef = static_cast(md->operand[i])->IsRegDef(); + if (srcIsDef) { + break; + } + match = true; + break; + } + } + } + if (match && li.GetLastUse() > nInsn->GetId()) { + return 0; + } + /* dest of adrp is src of next insn */ + if (match) { + return GetRegFromSet(intParamRegSet, R0, li, regNO - R0); + } + return 0; +} + +uint32 LSRALinearScanRegAllocator::FindAvailablePhyRegByFastAlloc(LiveInterval &li) { + uint32 regNO = 0; + if (li.GetRegType() == kRegTyInt) { + if (!intCalleeRegSet.empty()) { + regNO = GetRegFromSet(intCalleeRegSet, R0, li); + li.SetShouldSave(false); + } else if (!intCallerRegSet.empty()) { + regNO = GetRegFromSet(intCallerRegSet, R0, li); + li.SetShouldSave(true); + } else { + li.SetShouldSave(false); + } + } else if (li.GetRegType() == kRegTyFloat) { + if (!fpCalleeRegSet.empty()) { + regNO = GetRegFromSet(fpCalleeRegSet, V0, li); + li.SetShouldSave(false); + } else if (!fpCallerRegSet.empty()) { + regNO = GetRegFromSet(fpCallerRegSet, V0, li); + li.SetShouldSave(true); + } else { + li.SetShouldSave(false); + } + } + return regNO; +} + +bool LSRALinearScanRegAllocator::NeedSaveAcrossCall(LiveInterval &li) { + bool saveAcrossCall = false; + for (const auto *cli : callList) { + if (cli->GetFirstDef() > li.GetLastUse()) { + break; + } + /* Determine if live interval crosses the call */ + if ((cli->GetFirstDef() > li.GetFirstDef()) && (cli->GetFirstDef() < li.GetLastUse())) { + li.SetShouldSave(true); + /* Need to spill/fill around this call */ + saveAcrossCall = true; + break; + } + } + return saveAcrossCall; +} + +uint32 LSRALinearScanRegAllocator::FindAvailablePhyReg(LiveInterval &li, Insn &insn, bool isIntReg) { + uint32 regNO = 0; + MapleSet &callerRegSet = isIntReg ? intCallerRegSet : fpCallerRegSet; + MapleSet &calleeRegSet = isIntReg ? intCalleeRegSet : fpCalleeRegSet; + MapleSet ¶mRegSet = isIntReg ? intParamRegSet : fpParamRegSet; + AArch64reg reg0 = isIntReg ? R0 : V0; + + /* See if register is live accross a call */ + bool saveAcrossCall = NeedSaveAcrossCall(li); + if (saveAcrossCall) { + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "\t\tlive interval crosses a call\n"; + } + if (regNO == 0) { + if (!li.IsInCatch() && li.IsAllInCleanupOrFirstBB() == false && !calleeRegSet.empty()) { + /* call in live interval, use callee if available */ + regNO = GetRegFromSet(calleeRegSet, reg0, li); + /* Since it is callee saved, no need to continue search */ + li.SetShouldSave(false); + } else if (li.IsMultiUseInBB()) { + /* + * allocate caller save if there are multiple uses in one bb + * else it is no different from spilling + */ + if (!callerRegSet.empty()) { + regNO = GetRegFromSet(callerRegSet, reg0, li); + } else if (!paramRegSet.empty()) { + regNO = GetRegFromSet(paramRegSet, reg0, li); + } + } + } + if (regNO == 0) { + /* No register left for allocation */ + regNO = FillInHole(li); + if (regNO == 0) { + li.SetShouldSave(false); + } + } + return regNO; + } else { + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "\t\tlive interval does not cross a call\n"; + } + if (isIntReg) { + regNO = AssignSpecialPhysRegPattern(insn, li); + if (regNO != 0) { + return regNO; + } + } + if (!paramRegSet.empty()) { + regNO = GetRegFromSet(paramRegSet, reg0, li); + } + if (regNO == 0) { + if (!callerRegSet.empty()) { + regNO = GetRegFromSet(callerRegSet, reg0, li); + } else if (!calleeRegSet.empty()) { + regNO = GetRegFromSet(calleeRegSet, reg0, li); + } else { + regNO = FillInHole(li); + } + } + return regNO; + } +} + +/* Return a phys register number for the live interval. */ +uint32 LSRALinearScanRegAllocator::FindAvailablePhyReg(LiveInterval &li, Insn &insn) { + if (fastAlloc) { + return FindAvailablePhyRegByFastAlloc(li); + } + uint32 regNO = 0; + if (li.GetRegType() == kRegTyInt) { + regNO = FindAvailablePhyReg(li, insn, true); + } else if (li.GetRegType() == kRegTyFloat) { + regNO = FindAvailablePhyReg(li, insn, false); + } + return regNO; +} + +/* Spill and reload for caller saved registers. */ +void LSRALinearScanRegAllocator::InsertCallerSave(Insn &insn, Operand &opnd, bool isDef) { + auto ®Opnd = static_cast(opnd); + uint32 vRegNO = regOpnd.GetRegisterNumber(); + if (vRegNO >= liveIntervalsArray.size()) { + CHECK_FATAL(false, "index out of range in LSRALinearScanRegAllocator::InsertCallerSave"); + } + LiveInterval *rli = liveIntervalsArray[vRegNO]; + RegType regType = regOpnd.GetRegisterType(); + + isSpillZero = false; + if (!isDef) { + uint32 mask; + uint32 regBase; + if (regType == kRegTyInt) { + mask = intBBDefMask; + regBase = R0; + } else { + mask = fpBBDefMask; + regBase = V0; + } + if (mask & (1u << (rli->GetAssignedReg() - regBase))) { + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "InsertCallerSave " << rli->GetAssignedReg() << " skipping due to local def\n"; + } + return; + } + } + + if (!rli->IsShouldSave()) { + return; + } + + uint32 regSize = regOpnd.GetSize(); + PrimType spType; + + if (regType == kRegTyInt) { + spType = (regSize <= k32BitSize) ? PTY_i32 : PTY_i64; + intBBDefMask |= (1u << (rli->GetAssignedReg() - R0)); + } else { + spType = (regSize <= k32BitSize) ? PTY_f32 : PTY_f64; + fpBBDefMask |= (1u << (rli->GetAssignedReg() - V0)); + } + + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "InsertCallerSave " << vRegNO << "\n"; + } + + if (!isDef && !rli->IsCallerSpilled()) { + LogInfo::MapleLogger() << "WARNING: " << vRegNO << " caller restore without spill in bb " + << insn.GetBB()->GetId() << " : " << cgFunc->GetName() << "\n"; + } + rli->SetIsCallerSpilled(true); + + if (isDef) { + MOperator opCode = insn.GetMachineOpcode(); + if (opCode == MOP_xmovri64 || opCode == MOP_xmovri32) { + Operand &opnd1 = insn.GetOperand(1); + auto &imm = static_cast(opnd1); + if (imm.IsZero()) { + isSpillZero = true; + } + } else if (opCode == MOP_wmovrr || opCode == MOP_xmovrr) { + auto &opnd1 = static_cast(insn.GetOperand(1)); + if (opnd1.IsZeroRegister()) { + isSpillZero = true; + } + } + if (isSpillZero) { + /* This has to be a caller register */ + intBBDefMask &= ~(1u << (rli->GetAssignedReg() - R0)); + } + } + + auto *a64CGFunc = static_cast(cgFunc); + CG *cg = a64CGFunc->GetCG(); + MemOperand *memOpnd = nullptr; + RegOperand *phyOpnd = nullptr; + + if (isSpillZero) { + phyOpnd = &AArch64RegOperand::GetZeroRegister(regSize); + } else { + phyOpnd = &a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(rli->GetAssignedReg()), regSize, + regType); + } + + std::string comment; + bool isOutOfRange = false; + if (isDef) { + memOpnd = GetSpillMem(vRegNO, true, insn, static_cast(intSpillRegSet[0] + R0), isOutOfRange); + Insn &stInsn = cg->BuildInstruction(a64CGFunc->PickStInsn(regSize, spType), *phyOpnd, *memOpnd); + comment = " SPILL for caller_save " + std::to_string(vRegNO); + ++callerSaveSpillCount; + if (rli->GetLastUse() == insn.GetId()) { + a64CGFunc->FreeSpillRegMem(vRegNO); + comment += " end"; + } + stInsn.SetComment(comment); + if (isOutOfRange) { + insn.GetBB()->InsertInsnAfter(*insn.GetNext(), stInsn); + } else { + insn.GetBB()->InsertInsnAfter(insn, stInsn); + } + } else { + memOpnd = GetSpillMem(vRegNO, false, insn, static_cast(intSpillRegSet[0] + R0), isOutOfRange); + Insn &ldInsn = cg->BuildInstruction(a64CGFunc->PickLdInsn(regSize, spType), *phyOpnd, *memOpnd); + comment = " RELOAD for caller_save " + std::to_string(vRegNO); + ++callerSaveReloadCount; + if (rli->GetLastUse() == insn.GetId()) { + a64CGFunc->FreeSpillRegMem(vRegNO); + comment += " end"; + } + ldInsn.SetComment(comment); + insn.GetBB()->InsertInsnBefore(insn, ldInsn); + } +} + +/* Shell function to find a physical register for an operand. */ +RegOperand *LSRALinearScanRegAllocator::AssignPhysRegs(Operand &opnd, Insn &insn) { + auto ®Opnd = static_cast(opnd); + uint32 vRegNO = regOpnd.GetRegisterNumber(); + RegType regType = regOpnd.GetRegisterType(); + if (vRegNO >= liveIntervalsArray.size()) { + CHECK_FATAL(false, "index out of range in LSRALinearScanRegAllocator::AssignPhysRegs"); + } + LiveInterval *li = liveIntervalsArray[vRegNO]; + + bool doNotSpill = false; + if (li->IsMustAllocate() || (insn.GetMachineOpcode() == MOP_adrp_ldr && insn.GetNext() && + insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail) || + (insn.GetMachineOpcode() == MOP_clinit_tail)) { + /* Cannot spill for clinit pair */ + doNotSpill = true; + } else if (spillAll) { + return nullptr; + } else if (IN_SPILL_RANGE) { + return nullptr; + } + + if (doNotSpill) { + li->SetMustAllocate(true); + } + + /* + * if only def, no use, then should assign a new phyreg, + * otherwise, there may be conflict + */ + if (li->GetAssignedReg() != 0 && (li->GetLastUse() != 0 || li->GetPhysUse() != 0)) { + if (AArch64Abi::IsCalleeSavedReg(static_cast(li->GetAssignedReg()))) { + ++calleeUseCnt[li->GetAssignedReg()]; + } + if (li->GetStackSlot() == 0xFFFFFFFF) { + return &static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(li->GetAssignedReg()), opnd.GetSize(), regType); + } else { + /* need to reload */ + return nullptr; + } + } + + /* pre spilled: */ + if (li->GetStackSlot() != 0xFFFFFFFF) { + return nullptr; + } + + if (LSRA_DUMP) { + uint32 activeSz = active.size(); + LogInfo::MapleLogger() << "\tAssignPhysRegs-active_sz " << activeSz << "\n"; + } + + uint32 regNO = FindAvailablePhyReg(*li, insn); + if (regNO != 0) { + if (AArch64Abi::IsCalleeSavedReg(static_cast(regNO))) { + if (!CGOptions::DoCalleeToSpill()) { + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "\tCallee-save register for save/restore in prologue/epilogue: " << regNO << "\n"; + } + static_cast(cgFunc)->AddtoCalleeSaved(static_cast(regNO)); + } + ++calleeUseCnt[regNO]; + } + return &static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(li->GetAssignedReg()), opnd.GetSize(), regType); + } + + return nullptr; +} + +MemOperand *LSRALinearScanRegAllocator::GetSpillMem(uint32 vRegNO, bool isDest, Insn &insn, AArch64reg regNO, + bool &isOutOfRange) { + auto *a64CGFunc = static_cast(cgFunc); + MemOperand *memOpnd = a64CGFunc->GetOrCreatSpillMem(vRegNO); + return (a64CGFunc->AdjustMemOperandIfOffsetOutOfRange(memOpnd, vRegNO, isDest, insn, regNO, isOutOfRange)); +} + +/* Set a vreg in live interval as being marked for spill. */ +void LSRALinearScanRegAllocator::SetOperandSpill(Operand &opnd) { + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "SetOperandSpill " << regNO; + LogInfo::MapleLogger() << "(" << liveIntervalsArray[regNO]->GetFirstAcrossedCall(); + LogInfo::MapleLogger() << ", refCount " << liveIntervalsArray[regNO]->GetRefCount() << ")\n"; + } + + ASSERT(regNO < liveIntervalsArray.size(), + "index out of vector size in LSRALinearScanRegAllocator::SetOperandSpill"); + LiveInterval *li = liveIntervalsArray[regNO]; + li->SetStackSlot(kSpilled); +} + +/* + * Generate spill/reload for an operand. + * spill_idx : one of 3 phys regs set aside for the purpose of spills. + */ +void LSRALinearScanRegAllocator::SpillOperand(Insn &insn, Operand &opnd, bool isDef, uint32 spillIdx) { + /* + * Insert spill (def) and fill (use) instructions for the operand. + * Keep track of the 'slot' (base 0). The actual slot on the stack + * will be some 'base_slot_offset' + 'slot' off FP. + * For simplification, entire 64bit register is spilled/filled. + * + * For example, a virtual register home 'slot' on the stack is location 5. + * This represents a 64bit slot (8bytes). The base_slot_offset + * from the base 'slot' determined by whoever is added, off FP. + * stack address is ( FP - (5 * 8) + base_slot_offset ) + * So the algorithm is simple, for each virtual register that is not + * allocated, it has to have a home address on the stack (a slot). + * A class variable is used, start from 0, increment by 1. + * Since LiveInterval already represent unique regNO information, + * just add a slot number to it. Subsequent reference to a regNO + * will either get an allocated physical register or a slot number + * for computing the stack location. + * + * This function will also determine the operand to be a def or use. + * For def, spill instruction(s) is appended after the insn. + * For use, spill instruction(s) is prepended before the insn. + * Use FP - (slot# *8) for now. Will recompute if base_slot_offset + * is not 0. + * + * The total number of slots used will be used to compute the stack + * frame size. This will require some interface external to LSRA. + * + * For normal instruction, two spill regs should be enough. The caller + * controls which ones to use. + * For more complex operations, need to break down the instruction. + * eg. store v1 -> [v2 + v3] // 3 regs needed + * => p1 <- v2 // address part 1 + * p2 <- v3 // address part 2 + * p1 <- p1 + p2 // freeing up p2 + * p2 <- v1 + * store p2 -> [p1] + * or we can allocate more registers to the spill register set + * For store multiple, need to break it down into two or more instr. + */ + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "SpillOperand " << regNO << "\n"; + } + + isSpillZero = false; + + regno_t spReg; + PrimType spType; + CHECK_FATAL(regNO < liveIntervalsArray.size(), "index out of range in LSRALinearScanRegAllocator::SpillOperand"); + LiveInterval *li = liveIntervalsArray[regNO]; + ASSERT(!li->IsShouldSave(), "SpillOperand: Should not be caller"); + uint32 regSize = regOpnd.GetSize(); + auto *a64CGFunc = static_cast(cgFunc); + CG *cg = a64CGFunc->GetCG(); + RegType regType = regOpnd.GetRegisterType(); + + if (isDef) { + MOperator opCode = insn.GetMachineOpcode(); + if (opCode == MOP_xmovri64 || opCode == MOP_xmovri32) { + Operand &opnd1 = insn.GetOperand(1); + auto &imm = static_cast(opnd1); + if (imm.IsZero()) { + isSpillZero = true; + } + } else if (opCode == MOP_wmovrr || opCode == MOP_xmovrr) { + auto &opnd1 = static_cast(insn.GetOperand(1)); + if (opnd1.IsZeroRegister()) { + isSpillZero = true; + } + } + } + + if (li->GetRegType() == kRegTyInt) { + ASSERT((spillIdx < intSpillRegSet.size()), "SpillOperand: ran out int spill reg"); + spReg = intSpillRegSet[spillIdx] + R0; + spType = (regSize <= k32BitSize) ? PTY_i32 : PTY_i64; + } else if (li->GetRegType() == kRegTyFloat) { + ASSERT((spillIdx < fpSpillRegSet.size()), "SpillOperand: ran out fp spill reg"); + spReg = fpSpillRegSet[spillIdx] + V0; + spType = (regSize <= k32BitSize) ? PTY_f32 : PTY_f64; + } else { + CHECK_FATAL(false, "SpillOperand: Should be int or float type"); + } + + bool isOutOfRange = false; + RegOperand *phyOpnd = nullptr; + if (isSpillZero) { + phyOpnd = &AArch64RegOperand::GetZeroRegister(regSize); + } else { + phyOpnd = &a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(spReg), regSize, regType); + } + li->SetAssignedReg(phyOpnd->GetRegisterNumber()); + + MemOperand *memOpnd = nullptr; + if (isDef) { + /* + * Need to assign spReg (one of the two spill reg) to the destination of the insn. + * spill_vreg <- opn1 op opn2 + * to + * spReg <- opn1 op opn2 + * store spReg -> spillmem + */ + li->SetStackSlot(kSpilled); + + ++spillCount; + memOpnd = GetSpillMem(regNO, true, insn, static_cast(intSpillRegSet[spillIdx + 1] + R0), isOutOfRange); + Insn &stInsn = cg->BuildInstruction(a64CGFunc->PickStInsn(regSize, spType), *phyOpnd, *memOpnd); + std::string comment = " SPILL vreg:" + std::to_string(regNO); + if (li->GetLastUse() == insn.GetId()) { + a64CGFunc->FreeSpillRegMem(regNO); + comment += " end"; + } + stInsn.SetComment(comment); + if (isOutOfRange) { + insn.GetBB()->InsertInsnAfter(*insn.GetNext(), stInsn); + } else { + insn.GetBB()->InsertInsnAfter(insn, stInsn); + } + } else { + /* Here, reverse of isDef, change either opn1 or opn2 to the spReg. */ + if (li->GetStackSlot() == 0xFFFFFFFF) { + LogInfo::MapleLogger() << "WARNING: " << regNO << " assigned " << li->GetAssignedReg() << + " restore without spill in bb " << insn.GetBB()->GetId() << " : " << + cgFunc->GetName() << "\n"; + } + ++reloadCount; + memOpnd = GetSpillMem(regNO, false, insn, static_cast(intSpillRegSet[spillIdx] + R0), isOutOfRange); + Insn &ldInsn = cg->BuildInstruction(a64CGFunc->PickLdInsn(regSize, spType), *phyOpnd, *memOpnd); + std::string comment = " RELOAD vreg" + std::to_string(regNO); + if (li->GetLastUse() == insn.GetId()) { + a64CGFunc->FreeSpillRegMem(regNO); + comment += " end"; + } + ldInsn.SetComment(comment); + insn.GetBB()->InsertInsnBefore(insn, ldInsn); + } +} + +RegOperand *LSRALinearScanRegAllocator::HandleSpillForInsn(Insn &insn, Operand &opnd) { + /* choose the lowest priority li to spill */ + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + ASSERT(regNO < liveIntervalsArray.size(), + "index out of range of MapleVector in LSRALinearScanRegAllocator::HandleSpillForInsn"); + LiveInterval *li = liveIntervalsArray[regNO]; + RegType regType = regOpnd.GetRegisterType(); + LiveInterval *spillLi = nullptr; + FindLowestPrioInActive(spillLi, regType, true); + + /* + * compare spill_li with current li + * spill_li is null and li->SetStackSlot(Spilled) when the li is spilled due to LiveIntervalAnalysis + */ + if (spillLi == nullptr || spillLi->GetLiParent() || spillLi->GetLiChild() || li->GetStackSlot() == kSpilled || + li->GetFirstDef() != insn.GetId() || li->GetPriority() < spillLi->GetPriority() || + li->GetRefCount() < spillLi->GetRefCount() || + !(AArch64Abi::IsCalleeSavedReg(static_cast(spillLi->GetAssignedReg())))) { + /* spill current li */ + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "Flexible Spill: still spill " << li->GetRegNO() << ".\n"; + } + SetOperandSpill(opnd); + return nullptr; + } + + ReturnPregToSet(*spillLi, spillLi->GetAssignedReg()); + RegOperand *newOpnd = AssignPhysRegs(opnd, insn); + if (newOpnd == nullptr) { + ReleasePregToSet(*spillLi, spillLi->GetAssignedReg()); + SetOperandSpill(opnd); + return nullptr; + } + + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "Flexible Spill: " << spillLi->GetRegNO() << " instead of " << li->GetRegNO() << ".\n"; + PrintLiveInterval(*spillLi, "TO spill: "); + PrintLiveInterval(*li, "Instead of: "); + } + + /* spill this live interval */ + active.erase(itFinded); + spillLi->SetStackSlot(kSpilled); + + return newOpnd; +} + +bool LSRALinearScanRegAllocator::OpndNeedAllocation(Insn &insn, Operand &opnd, bool isDef, uint32 insnNum) { + if (!opnd.IsRegister()) { + return false; + } + auto ®Opnd = static_cast(opnd); + RegType regType = regOpnd.GetRegisterType(); + uint32 regNO = regOpnd.GetRegisterNumber(); + if (regType == kRegTyCc || regType == kRegTyVary) { + return false; + } + if (IsUntouchableReg(regNO) || regOpnd.IsConstReg()) { + return false; + } + if (regOpnd.IsPhysicalRegister()) { + if ((insn.GetMachineOpcode() == MOP_adrp_ldr && insn.GetNext() && + insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail) || + (insn.GetMachineOpcode() == MOP_clinit_tail)) { + /* Cannot spill for clinit pair */ + } else if (spillAll) { + return false; + } + if (isDef) { + if (regType == kRegTyInt) { + if (regNO > R7 || intParamQueue[regNO - R0].empty()) { + return false; + } + LiveInterval *li = intParamQueue[regNO - R0].front(); + /* li may have been inserted by InsertParamToActive */ + if (li->GetFirstDef() == insnNum) { + intParamRegSet.erase(regNO - R0); + (void)active.insert(li); + ASSERT((regNO - R0) < intParamQueue.size(), + "index out of range in LSRALinearScanRegAllocator::OpndNeedAllocation"); + intParamQueue[regNO - R0].pop_front(); + } + } else { + if (regNO > V7 || fpParamQueue[regNO - V0].empty()) { + return false; + } + LiveInterval *li = fpParamQueue[regNO - V0].front(); + /* li may have been inserted by InsertParamToActive */ + if (li->GetFirstDef() == insnNum) { + fpParamRegSet.erase(regNO - V0); + (void)active.insert(li); + fpParamQueue[regNO - V0].pop_front(); + } + } + } + return false; + } + /* This is a virtual register */ + return true; +} + +void LSRALinearScanRegAllocator::InsertParamToActive(Operand &opnd) { + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + CHECK_FATAL(regNO < liveIntervalsArray.size(), + "index out of range in LSRALinearScanRegAllocator::InsertParamToActive"); + LiveInterval *li = liveIntervalsArray[regNO]; + /* Search for parameter registers that is in the live range to insert into queue */ + if (li->GetRegType() == kRegTyInt) { + for (uint32 i = 0; i < intParamQueue.size(); ++i) { + if (intParamQueue[i].empty()) { + continue; + } + LiveInterval *pli = intParamQueue[i].front(); + do { + if ((pli->GetFirstDef() <= li->GetFirstDef()) && (pli->GetPhysUse() <= li->GetFirstDef())) { + /* just discard it */ + intParamQueue[i].pop_front(); + if (intParamQueue[i].empty()) { + break; + } + pli = intParamQueue[i].front(); + } else { + break; + } + } while (true); + if ((pli->GetFirstDef() < li->GetLastUse()) && (pli->GetPhysUse() > li->GetFirstDef())) { + if (intParamRegSet.find(i) != intParamRegSet.end()) { + /* reserve this param register and active the its first use */ + lastIntParamLi[i] = pli; + intParamRegSet.erase(i); + intParamQueue[i].pop_front(); + } + } + } + } else { + ASSERT((li->GetRegType() == kRegTyFloat), "InsertParamToActive: Incorrect register type"); + for (uint32 i = 0; i < fpParamQueue.size(); ++i) { + if (fpParamQueue[i].empty()) { + continue; + } + LiveInterval *pli = fpParamQueue[i].front(); + do { + if ((pli->GetFirstDef() <= li->GetFirstDef()) && (pli->GetPhysUse() <= li->GetFirstDef())) { + /* just discard it */ + fpParamQueue[i].pop_front(); + if (fpParamQueue[i].empty()) { + break; + } + pli = fpParamQueue[i].front(); + } else { + break; + } + } while (true); + if ((pli->GetFirstDef() < li->GetLastUse()) && (pli->GetPhysUse() > li->GetFirstDef())) { + if (fpParamRegSet.find(i) != fpParamRegSet.end()) { + lastFpParamLi[i] = pli; + fpParamRegSet.erase(i); + fpParamQueue[i].pop_front(); + } + } + } + } +} + +/* Insert a live interval into the 'active' list. */ +void LSRALinearScanRegAllocator::InsertToActive(Operand &opnd, uint32 insnNum) { + auto ®Opnd = static_cast(opnd); + uint32 regNO = regOpnd.GetRegisterNumber(); + CHECK_FATAL(regNO < liveIntervalsArray.size(), + "index out of range in LSRALinearScanRegAllocator::InsertToActive"); + LiveInterval *li = liveIntervalsArray[regNO]; + if (li->GetLastUse() <= insnNum) { + /* insert first, and retire later, then the assigned reg can be released */ + (void)active.insert(li); + if (LSRA_DUMP) { + PrintLiveInterval(*li, "LiveInterval is skip due to past insn num --- opt to remove redunant insn"); + } + return; + } + (void)active.insert(li); +} + +/* find the lowest one and erase it from active */ +void LSRALinearScanRegAllocator::FindLowestPrioInActive(LiveInterval *&targetLi, RegType regType, bool startRA) { + float lowestPrio = 100.0; + bool found = false; + MapleSet::iterator it; + MapleSet::iterator lowestIt; + for (it = active.begin(); it != active.end(); ++it) { + auto *li = static_cast(*it); + if (startRA && li->GetPhysUse() != 0) { + continue; + } + if (li->GetPriority() < lowestPrio && li->GetRegType() == regType) { + lowestPrio = li->GetPriority(); + lowestIt = it; + found = true; + } + } + if (found) { + targetLi = *lowestIt; + itFinded = lowestIt; + } +} + +/* Calculate the weight of a live interval for pre-spill and flexible spill */ +void LSRALinearScanRegAllocator::LiveIntervalAnalysis() { + for (uint32 bbIdx = 0; bbIdx < sortedBBs.size(); ++bbIdx) { + BB *bb = sortedBBs[bbIdx]; + + /* 1. calculate live interfere */ + FOR_BB_INSNS(insn, bb) { + if (insn->IsImmaterialInsn() || !insn->IsMachineInstruction() || insn->GetId() == 0) { + /* New instruction inserted by reg alloc (ie spill) */ + continue; + } + + /* simple retire from active */ + MapleSet::iterator it; + for (it = active.begin(); it != active.end(); /* erase will update */) { + auto *li = static_cast(*it); + if (li->GetLastUse() > insn->GetId()) { + break; + } + it = active.erase(it); + } + + /* simple insert to active */ + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + bool isDef = static_cast(md->operand[i])->IsRegDef(); + if (isDef) { + auto ®Opnd = static_cast(opnd); + if (regOpnd.IsVirtualRegister() && regOpnd.GetRegisterType() != kRegTyCc) { + uint32 regNO = regOpnd.GetRegisterNumber(); + LiveInterval *li = liveIntervalsArray[regNO]; + if (li->GetFirstDef() == insn->GetId()) { + (void)active.insert(li); + } + } + } + } + + /* get interfere info */ + uint32 interNum = active.size(); + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "In insn " << insn->GetId() << ", " << interNum << " overlap live intervals.\n"; + } + + /* 2. analysis which to spill */ + while (interNum > CGOptions::GetOverlapNum()) { + LiveInterval *lowestLi = nullptr; + FindLowestPrioInActive(lowestLi); + if (lowestLi != nullptr) { + if (LSRA_DUMP) { + PrintLiveInterval(*lowestLi, "Pre spilled: "); + } + lowestLi->SetStackSlot(kSpilled); + active.erase(itFinded); + interNum = active.size(); + } else { + break; + } + } + } + } + + active.clear(); +} + +/* Iterate through the operands of an instruction for allocation. */ +void LSRALinearScanRegAllocator::AssignPhysRegsForInsn(Insn &insn) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(&insn)->GetMachineOpcode()]; + + /* At the beginning of the landing pad, we handle the x1, x2 as if they are implicitly defined. */ + if (!insn.GetBB()->GetEhPreds().empty() && &insn == insn.GetBB()->GetFirstInsn()) { + if (!intParamQueue[0].empty()) { + LiveInterval *li = intParamQueue[0].front(); + if (li->GetFirstDef() == insn.GetId()) { + intParamRegSet.erase(li->GetAssignedReg() - R0); + (void)active.insert(li); + intParamQueue[0].pop_front(); + } + } + + if (!intParamQueue[1].empty()) { + LiveInterval *li = intParamQueue[1].front(); + if (li->GetFirstDef() == insn.GetId()) { + intParamRegSet.erase(li->GetAssignedReg() - R0); + (void)active.insert(li); + intParamQueue[1].pop_front(); + } + } + } + + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "active in " << insn.GetId() << " :"; + PrintActiveListSimple(); + } + uint32 opndNum = insn.GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + bool isDef = static_cast(md->operand[i])->IsRegDef(); + RegOperand *newOpnd = nullptr; + if (opnd.IsList()) { + /* For arm32, not arm64 */ + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + isDef = false; + if (base != nullptr) { + if (OpndNeedAllocation(insn, *base, isDef, insn.GetId())) { + newOpnd = AssignPhysRegs(*base, insn); + if (newOpnd == nullptr) { + SetOperandSpill(*base); + } + /* add ASSERT here. */ + } + } + if (offset != nullptr) { + if (!OpndNeedAllocation(insn, *offset, isDef, insn.GetId())) { + continue; + } + newOpnd = AssignPhysRegs(*offset, insn); + if (newOpnd == nullptr) { + SetOperandSpill(*offset); + } + } + } else { + if (!OpndNeedAllocation(insn, opnd, isDef, insn.GetId())) { + continue; + } + if (isDef && !fastAlloc) { + InsertParamToActive(opnd); + } + newOpnd = AssignPhysRegs(opnd, insn); + if (newOpnd != nullptr) { + if (isDef) { + InsertToActive(opnd, insn.GetId()); + } + } else { + /* + * If dest and both src are spilled, src will use both of the + * spill registers. + * dest can use any spill reg, choose 0 + */ + if (isDef) { + newOpnd = HandleSpillForInsn(insn, opnd); + if (newOpnd != nullptr) { + InsertToActive(opnd, insn.GetId()); + } + } else { + SetOperandSpill(opnd); + } + } + } + } +} + +/* + * Create an operand with physical register assigned, or a spill register + * in the case where a physical register cannot be assigned. + */ +RegOperand *LSRALinearScanRegAllocator::GetReplaceOpnd(Insn &insn, Operand &opnd, uint32 &spillIdx, bool isDef) { + if (!opnd.IsRegister()) { + return nullptr; + } + const auto *regOpnd = static_cast(&opnd); + + uint32 vRegNO = regOpnd->GetRegisterNumber(); + RegType regType = regOpnd->GetRegisterType(); + if (regType == kRegTyCc || regType == kRegTyVary) { + return nullptr; + } + if (IsUntouchableReg(vRegNO) || regOpnd->IsConstReg()) { + return nullptr; + } + if (regOpnd->IsPhysicalRegister()) { + return nullptr; + } + + ASSERT(vRegNO < liveIntervalsArray.size(), + "index out of range of MapleVector in LSRALinearScanRegAllocator::GetReplaceOpnd"); + LiveInterval *li = liveIntervalsArray[vRegNO]; + + bool addCalleeToSaved = true; + regno_t regNO = li->GetAssignedReg(); + bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast(regNO)); + if (CGOptions::DoCalleeToSpill() && + /* prolog can use stp, so try to estimate if spill callee should be done. */ + ((shouldOptIntCallee && li->GetRegType() == kRegTyInt) || + (shouldOptFpCallee && li->GetRegType() == kRegTyFloat))) { + if (isCalleeReg) { + /* Determine if it is worth keeping callee */ + const uint32 spillResult = 1; + const uint32 spillReference = 2; + if (calleeUseCnt[regNO] == kRegNum2 && li->GetResultCount() == spillResult && + li->GetRefCount() == spillReference) { + /* This callee is allocated for one def and one use */ + li->SetStackSlot(kSpilled); + li->SetAssignedReg(0); + addCalleeToSaved = false; + } + } + } + if (isCalleeReg && addCalleeToSaved) { + static_cast(cgFunc)->AddtoCalleeSaved(static_cast(regNO)); + } + + if (li->IsShouldSave()) { + if (insn.GetMachineOpcode() == MOP_adrp_ldr && insn.GetNext() && + insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail) { + /* clinit pair */ + li->SetAssignedReg(R16); + } else if (insn.GetMachineOpcode() == MOP_clinit_tail && insn.GetPrev() && + insn.GetPrev()->GetMachineOpcode() == MOP_adrp_ldr) { + isDef = true; + InsertCallerSave(insn, opnd, isDef); + } else { + InsertCallerSave(insn, opnd, isDef); + } + } else if (li->GetStackSlot() == kSpilled) { + /* Determine if spill can reside in localref space */ + if ((insn.GetMachineOpcode() == MOP_adrp_ldr && insn.GetNext() && + insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) { + /* clinit pair */ + li->SetAssignedReg(R16); + } else if (insn.GetMachineOpcode() == MOP_clinit_tail && insn.GetPrev() && + insn.GetPrev()->GetMachineOpcode() == MOP_adrp_ldr) { + isDef = true; + spillIdx = 0; + SpillOperand(insn, opnd, isDef, spillIdx); + } else { + if (isDef) { + spillIdx = 0; + } + SpillOperand(insn, opnd, isDef, spillIdx); + if (!isDef) { + ++spillIdx; + } + } + } + + RegOperand &phyOpnd = static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand( + static_cast(li->GetAssignedReg()), opnd.GetSize(), regType); + + return &phyOpnd; +} + +/* Try to estimate if spill callee should be done based on even/odd for stp in prolog. */ +void LSRALinearScanRegAllocator::CheckSpillCallee() { + if (CGOptions::DoCalleeToSpill()) { + uint32 pairCnt = 0; + for (size_t idx = 0; idx < sizeof(uint32); ++idx) { + if ((intCalleeMask & (1ULL << idx)) != 0 && calleeUseCnt[idx] != 0) { + ++pairCnt; + } + } + if ((pairCnt & 0x01) != 0) { + shouldOptIntCallee = true; + } + + for (size_t idx = 0; idx < sizeof(uint32); ++idx) { + if ((fpCalleeMask & (1ULL << idx)) != 0 && calleeUseCnt[idx] != 0) { + ++pairCnt; + } + } + if ((pairCnt & 0x01) != 0) { + shouldOptFpCallee = true; + } + } +} + +/* Iterate through all instructions and change the vreg to preg. */ +void LSRALinearScanRegAllocator::FinalizeRegisters() { + CheckSpillCallee(); + for (BB *bb : sortedBBs) { + intBBDefMask = 0; + fpBBDefMask = 0; + + FOR_BB_INSNS(insn, bb) { + if (insn->IsImmaterialInsn() || !insn->IsMachineInstruction() || insn->GetId() == 0) { + continue; + } + + if (insn->IsCall()) { + intBBDefMask = 0; + fpBBDefMask = 0; + } + + uint32 spillIdx = 0; + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + + /* Handle source opernads first */ + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + ASSERT(md->operand[i] != nullptr, "pointer is null in LSRALinearScanRegAllocator::FinalizeRegisters"); + bool isDef = static_cast(md->operand[i])->IsRegDef(); + if (isDef) { + continue; + } + RegOperand *phyOpnd = nullptr; + if (opnd.IsList()) { + /* For arm32, not arm64 */ + } else if (opnd.IsMemoryAccessOperand()) { + auto *memOpnd = + static_cast(static_cast(opnd).Clone(*cgFunc->GetMemoryPool())); + ASSERT(memOpnd != nullptr, "memopnd is null in LSRALinearScanRegAllocator::FinalizeRegisters"); + insn->SetOperand(i, *memOpnd); + Operand *base = memOpnd->GetBaseRegister(); + Operand *offset = memOpnd->GetIndexRegister(); + if (base != nullptr) { + phyOpnd = GetReplaceOpnd(*insn, *base, spillIdx, false); + if (phyOpnd != nullptr) { + memOpnd->SetBaseRegister(*phyOpnd); + } + } + if (offset != nullptr) { + phyOpnd = GetReplaceOpnd(*insn, *offset, spillIdx, false); + if (phyOpnd != nullptr) { + memOpnd->SetIndexRegister(*phyOpnd); + } + } + } else { + phyOpnd = GetReplaceOpnd(*insn, opnd, spillIdx, false); + if (phyOpnd != nullptr) { + insn->SetOperand(i, *phyOpnd); + } + } + } + /* Handle dest opernads last */ + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + bool isDef = static_cast(md->operand[i])->IsRegDef(); + if (!isDef) { + continue; + } + isSpillZero = false; + RegOperand *phyOpnd = GetReplaceOpnd(*insn, opnd, spillIdx, true); + if (phyOpnd != nullptr) { + insn->SetOperand(i, *phyOpnd); + if (isSpillZero) { + insn->GetBB()->RemoveInsn(*insn); + } + } + } + } + } +} + +void LSRALinearScanRegAllocator::SetAllocMode() { + if (CGOptions::IsFastAlloc()) { + if (CGOptions::GetFastAllocMode() == 0) { + fastAlloc = true; + } else { + spillAll = true; + } + /* In-Range spill range can still be specified (only works with --dump-func=). */ + } else if (cgFunc->NumBBs() > CGOptions::GetLSRABBOptSize()) { + /* instruction size is checked in ComputeLieveInterval() */ + fastAlloc = true; + } + + if (LSRA_DUMP) { + if (fastAlloc) { + LogInfo::MapleLogger() << "fastAlloc mode on\n"; + } + if (spillAll) { + LogInfo::MapleLogger() << "spillAll mode on\n"; + } + } +} + +void LSRALinearScanRegAllocator::LinearScanRegAllocator() { + if (LSRA_DUMP) { + PrintParamQueue("Initial param queue"); + PrintCallQueue("Initial call queue"); + } + /* handle param register */ + for (auto &intParam : intParamQueue) { + if (!intParam.empty() && intParam.front()->GetFirstDef() == 0) { + LiveInterval *li = intParam.front(); + intParamRegSet.erase(li->GetAssignedReg() - R0); + (void)active.insert(li); + intParam.pop_front(); + } + } + for (auto &fpParam : fpParamQueue) { + if (!fpParam.empty() && fpParam.front()->GetFirstDef() == 0) { + LiveInterval *li = fpParam.front(); + fpParamRegSet.erase(li->GetAssignedReg() - V0); + (void)active.insert(li); + fpParam.pop_front(); + } + } + + for (BB *bb : sortedBBs) { + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "======New BB=====" << bb->GetId() << " " << std::hex << bb << std::dec << "\n"; + } + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + if (insn->GetId() == 0) { + /* New instruction inserted by reg alloc (ie spill) */ + continue; + } + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "======New Insn=====" << insn->GetId() << " " << insn->GetBB()->GetId() << "\n"; + insn->Dump(); + } + RetireFromActive(*insn); +#ifdef LSRA_DEBUG + DebugCheckActiveList(); +#endif + AssignPhysRegsForInsn(*insn); + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "======After Alloc=====" << insn->GetId() << " " << insn->GetBB()->GetId() << "\n"; + insn->Dump(); + } + } + } +} + +/* Main entrance for the LSRA register allocator. */ +bool LSRALinearScanRegAllocator::AllocateRegisters() { + auto *a64CGFunc = static_cast(cgFunc); + /* + * we store both FP/LR if using FP or if not using FP, but func has a call + * Using FP, record it for saving + */ + a64CGFunc->AddtoCalleeSaved(RFP); + a64CGFunc->AddtoCalleeSaved(RLR); + a64CGFunc->NoteFPLRAddedToCalleeSavedList(); + + if (LSRA_DUMP) { + const MIRModule &mirModule = cgFunc->GetMirModule(); + DotGenerator::GenerateDot("RA", *cgFunc, mirModule); + DotGenerator::GenerateDot("RAe", *cgFunc, mirModule, true); + } + + if (CGOptions::DoPreLSRAOpt()) { + PropagateX0(); + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "******** CG IR After PreLSRA: *********" << '\n'; + cgFunc->DumpCGIR(); + } + } + + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "Entering LinearScanRegAllocator\n"; + } + + ComputeBlockOrder(); + + ComputeLiveInterval(); + +#ifdef LSRA_GRAPH + PrintLiveRanges(); +#endif + + LiveIntervalAnalysis(); + + InitFreeRegPool(); + + BuildIntervalRanges(); + + SetAllocMode(); + + LinearScanRegAllocator(); + + FinalizeRegisters(); + + if (LSRA_DUMP) { + LogInfo::MapleLogger() << "Total " << spillCount << " spillCount in " << cgFunc->GetName() << " \n"; + LogInfo::MapleLogger() << "Total " << reloadCount << " reloadCount\n"; + LogInfo::MapleLogger() << "Total " << "(" << spillCount << "+ " << callerSaveSpillCount << ") = " << + (spillCount + callerSaveSpillCount) << " SPILL\n"; + LogInfo::MapleLogger() << "Total " << "(" << reloadCount << "+ " << callerSaveReloadCount << ") = " << + (reloadCount + callerSaveReloadCount) << " RELOAD\n"; + } + + return true; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_memlayout.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_memlayout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c078832212c6b07476d5d6f76af07939d546150 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_memlayout.cpp @@ -0,0 +1,446 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_memlayout.h" +#include "riscv64_cgfunc.h" +#include "riscv64_rt.h" +#include "becommon.h" +#include "mir_nodes.h" + +namespace maplebe { +using namespace maple; + +/* + * Returns stack space required for a call + * which is used to pass arguments that cannot be + * passed through registers + */ +uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, int32 &aggCopySize, bool isIcall) { + /* instantiate a parm locator */ + ParmLocator parmLocator(be); + uint32 sizeOfArgsToStkPass = 0; + size_t i = 0; + /* An indirect call's first operand is the invocation target */ + if (isIcall) { + ++i; + } + + if (std::strcmp(stmt.GetOpName(), "call") == 0) { + CallNode *callNode = static_cast(&stmt); + MIRFunction *fn = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode->GetPUIdx()); + MIRSymbol *symbol = be.GetMIRModule().CurFunction()->GetLocalOrGlobalSymbol(fn->GetStIdx(), false); + if (symbol->GetName() == "MCC_CallFastNative" || symbol->GetName() == "MCC_CallFastNativeExt" || + symbol->GetName() == "MCC_CallSlowNative0" || symbol->GetName() == "MCC_CallSlowNative1" || + symbol->GetName() == "MCC_CallSlowNative2" || symbol->GetName() == "MCC_CallSlowNative3" || + symbol->GetName() == "MCC_CallSlowNative4" || symbol->GetName() == "MCC_CallSlowNative5" || + symbol->GetName() == "MCC_CallSlowNative6" || symbol->GetName() == "MCC_CallSlowNative7" || + symbol->GetName() == "MCC_CallSlowNative8" || symbol->GetName() == "MCC_CallSlowNativeExt") { + ++i; + } + } + + aggCopySize = 0; + for (uint32 anum = 0; i < stmt.NumOpnds(); ++i, ++anum) { + BaseNode *opnd = stmt.Opnd(i); + MIRType *ty = nullptr; + if (opnd->GetPrimType() != PTY_agg) { + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(opnd->GetPrimType())]; + } else { + Opcode opndOpcode = opnd->GetOpCode(); + ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); + if (opndOpcode == OP_dread) { + DreadNode *dread = static_cast(opnd); + MIRSymbol *sym = be.GetMIRModule().CurFunction()->GetLocalOrGlobalSymbol(dread->GetStIdx()); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(sym->GetTyIdx()); + if (dread->GetFieldID() != 0) { + ASSERT(ty->GetKind() == kTypeStruct || ty->GetKind() == kTypeClass, "expect struct or class"); + if (ty->GetKind() == kTypeStruct) { + ty = static_cast(ty)->GetFieldType(dread->GetFieldID()); + } else { + ty = static_cast(ty)->GetFieldType(dread->GetFieldID()); + } + } + } else { + /* OP_iread */ + IreadNode *iread = static_cast(opnd); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); + ASSERT(ty->GetKind() == kTypePointer, "expect pointer"); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(static_cast(ty)->GetPointedTyIdx()); + if (iread->GetFieldID() != 0) { + ASSERT(ty->GetKind() == kTypeStruct || ty->GetKind() == kTypeClass, "expect struct or class"); + if (ty->GetKind() == kTypeStruct) { + ty = static_cast(ty)->GetFieldType(iread->GetFieldID()); + } else { + ty = static_cast(ty)->GetFieldType(iread->GetFieldID()); + } + } + } + } + PLocInfo ploc; + aggCopySize += parmLocator.LocateNextParm(*ty, ploc, anum == 0); + if (ploc.reg0 != 0) { + continue; /* passed in register, so no effect on actual area */ + } + sizeOfArgsToStkPass = RoundUp(ploc.memOffset + ploc.memSize, kSizeOfPtr); + } + return sizeOfArgsToStkPass; +} + +void AArch64MemLayout::SetSizeAlignForTypeIdx(uint32 typeIdx, uint32 &size, uint32 &align) const { + if (be.GetTypeSize(typeIdx) > k16ByteSize) { + /* size > 16 is passed on stack, the formal is just a pointer to the copy on stack. */ + align = kSizeOfPtr; + size = kSizeOfPtr; + } else { + align = be.GetTypeAlign(typeIdx); + size = be.GetTypeSize(typeIdx); + } +} + +void AArch64MemLayout::SetSegmentSize(AArch64SymbolAlloc &symbolAlloc, MemSegment &segment, uint32 typeIdx) { + uint32 size; + uint32 align; + SetSizeAlignForTypeIdx(typeIdx, size, align); + segment.SetSize(static_cast(RoundUp(static_cast(segment.GetSize()), align))); + symbolAlloc.SetOffset(segment.GetSize()); + segment.SetSize(segment.GetSize() + static_cast(size)); + segment.SetSize(static_cast(RoundUp(static_cast(segment.GetSize()), kSizeOfPtr))); +} + +void AArch64MemLayout::LayoutVarargParams() { + uint32 nIntRegs = 0; + uint32 nFpRegs = 0; + ParmLocator parmlocator(be); + PLocInfo ploc; + MIRFunction *func = mirFunction; + if (be.GetMIRModule().IsCModule() && func->GetAttr(FUNCATTR_varargs)) { + for (uint32 i = 0; i < func->GetFormalCount(); i++) { + if (i == 0) { + if (be.HasFuncReturnType(*func)) { + TyIdx tidx = be.GetFuncReturnType(*func); + if (be.GetTypeSize(tidx.GetIdx()) <= k16ByteSize) { + continue; + } + } + } + MIRType *ty = func->GetNthParamType(i); + parmlocator.LocateNextParm(*ty, ploc); + if (ploc.reg0 != kRinvalid) { + if (ploc.reg0 >= R0 && ploc.reg0 <= R7) { + nIntRegs++; + } else if (ploc.reg0 >= V0 && ploc.reg0 <= V7) { + nFpRegs++; + } + } + if (ploc.reg1 != kRinvalid) { + if (ploc.reg1 >= R0 && ploc.reg1 <= R7) { + nIntRegs++; + } else if (ploc.reg1 >= V0 && ploc.reg1 <= V7) { + nFpRegs++; + } + } + } + SetSizeOfGRSaveArea((k8BitSize - nIntRegs) * kSizeOfPtr); + SetSizeOfVRSaveArea((k8BitSize - nFpRegs) * kSizeOfPtr * k2ByteSize); + } +} + +void AArch64MemLayout::LayoutFormalParams() { + ParmLocator parmLocator(be); + PLocInfo ploc; + for (size_t i = 0; i < mirFunction->GetFormalCount(); ++i) { + MIRSymbol *sym = mirFunction->GetFormal(i); + bool noStackPara = false; + MIRType *ty = mirFunction->GetNthParamType(i); + uint32 ptyIdx = ty->GetTypeIndex(); + parmLocator.LocateNextParm(*ty, ploc, i == 0); + uint32 stIndex = sym->GetStIndex(); + AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); + SetSymAllocInfo(stIndex, *symLoc); + if (ploc.reg0 != kRinvalid) { /* register */ + symLoc->SetRegisters(ploc.reg0, ploc.reg1); + if (mirFunction->GetNthParamAttr(i).GetAttr(ATTR_localrefvar)) { + symLoc->SetMemSegment(segRefLocals); + SetSegmentSize(*symLoc, segRefLocals, ptyIdx); + } else if (!sym->IsPreg()) { + uint32 size; + uint32 align; + SetSizeAlignForTypeIdx(ptyIdx, size, align); + symLoc->SetMemSegment(GetSegArgsRegPassed()); + /* the type's alignment requirement may be smaller than a registser's byte size */ + segArgsRegPassed.SetSize(RoundUp(segArgsRegPassed.GetSize(), align)); + symLoc->SetOffset(segArgsRegPassed.GetSize()); + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + size); + } + noStackPara = true; + } else { /* stack */ + uint32 size; + uint32 align; + SetSizeAlignForTypeIdx(ptyIdx, size, align); + symLoc->SetMemSegment(GetSegArgsStkPassed()); + segArgsStkPassed.SetSize(RoundUp(segArgsStkPassed.GetSize(), align)); + symLoc->SetOffset(segArgsStkPassed.GetSize()); + segArgsStkPassed.SetSize(segArgsStkPassed.GetSize() + size); + /* We need it as dictated by the AArch64 ABI $5.4.2 C12 */ + segArgsStkPassed.SetSize(RoundUp(segArgsStkPassed.GetSize(), kSizeOfPtr)); + if (mirFunction->GetNthParamAttr(i).GetAttr(ATTR_localrefvar)) { + SetLocalRegLocInfo(sym->GetStIdx(), *symLoc); + AArch64SymbolAlloc *symLoc1 = memAllocator->GetMemPool()->New(); + symLoc1->SetMemSegment(segRefLocals); + SetSegmentSize(*symLoc1, segRefLocals, ptyIdx); + SetSymAllocInfo(stIndex, *symLoc1); + } + } + } +} + +void AArch64MemLayout::LayoutLocalVariales(std::vector &tempVar, std::vector &returnDelays) { + uint32 symTabSize = mirFunction->GetSymTab()->GetSymbolTableSize(); + for (uint32 i = 0; i < symTabSize; ++i) { + MIRSymbol *sym = mirFunction->GetSymTab()->GetSymbolFromStIdx(i); + if (sym == nullptr || sym->GetStorageClass() != kScAuto || sym->IsDeleted()) { + continue; + } + uint32 stIndex = sym->GetStIndex(); + TyIdx tyIdx = sym->GetTyIdx(); + AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); + SetSymAllocInfo(stIndex, *symLoc); + CHECK_FATAL(!symLoc->IsRegister(), "expect not register"); + + if (sym->IsRefType()) { + if (mirFunction->GetRetRefSym().find(sym) != mirFunction->GetRetRefSym().end()) { + /* try to put ret_ref at the end of segRefLocals */ + returnDelays.emplace_back(sym); + continue; + } + symLoc->SetMemSegment(segRefLocals); + segRefLocals.SetSize(RoundUp(segRefLocals.GetSize(), be.GetTypeAlign(tyIdx))); + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } else { + if (sym->GetName() == "__EARetTemp__" || + sym->GetName().substr(0, kEARetTempNameSize) == "__EATemp__") { + tempVar.emplace_back(sym); + continue; + } + symLoc->SetMemSegment(segLocals); + segLocals.SetSize(RoundUp(segLocals.GetSize(), be.GetTypeAlign(tyIdx))); + symLoc->SetOffset(segLocals.GetSize()); + segLocals.SetSize(segLocals.GetSize() + be.GetTypeSize(tyIdx)); + } + } +} + +void AArch64MemLayout::LayoutEAVariales(std::vector &tempVar) { + for (auto sym : tempVar) { + uint32 stIndex = sym->GetStIndex(); + TyIdx tyIdx = sym->GetTyIdx(); + AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); + SetSymAllocInfo(stIndex, *symLoc); + ASSERT(!symLoc->IsRegister(), "expect not register"); + symLoc->SetMemSegment(segRefLocals); + segRefLocals.SetSize(RoundUp(segRefLocals.GetSize(), be.GetTypeAlign(tyIdx))); + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } +} + +void AArch64MemLayout::LayoutReturnRef(std::vector &returnDelays, + int32 &structCopySize, int32 &maxParmStackSize) { + for (auto sym : returnDelays) { + uint32 stIndex = sym->GetStIndex(); + TyIdx tyIdx = sym->GetTyIdx(); + AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); + SetSymAllocInfo(stIndex, *symLoc); + ASSERT(!symLoc->IsRegister(), "expect not register"); + + ASSERT(sym->IsRefType(), "expect reftype "); + symLoc->SetMemSegment(segRefLocals); + segRefLocals.SetSize(RoundUp(segRefLocals.GetSize(), be.GetTypeAlign(tyIdx))); + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } + segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); + maxParmStackSize = segArgsToStkPass.GetSize(); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + AssignSpillLocationsToPseudoRegisters(); + } else { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + /* 8-VirtualRegNode occupy byte number */ + aarchCGFunc->SetCatchRegno(cgFunc->NewVReg(kRegTyInt, 8)); + } + segRefLocals.SetSize(RoundUp(segRefLocals.GetSize(), kSizeOfPtr)); + segLocals.SetSize(RoundUp(segLocals.GetSize(), kSizeOfPtr)); +} + +void AArch64MemLayout::LayoutActualParams() { + for (size_t i = 0; i < mirFunction->GetFormalCount(); ++i) { + MIRSymbol *sym = mirFunction->GetFormal(i); + if (sym->IsPreg()) { + continue; + } + uint32 stIndex = sym->GetStIndex(); + AArch64SymbolAlloc *symLoc = static_cast(GetSymAllocInfo(stIndex)); + if (symLoc->GetMemSegment() == &GetSegArgsRegPassed()) { /* register */ + /* + * In O0, we store parameters passed via registers into memory. + * So, each of such parameter needs to get assigned storage in stack. + * If a function parameter is never accessed in the function body, + * and if we don't create its memory operand here, its offset gets + * computed when the instruction to store its value into stack + * is generated in the prologue when its memory operand is created. + * But, the parameter would see a different StackFrameSize than + * the parameters that are accessed in the body, because + * the size of the storage for FP/LR is added to the stack frame + * size in between. + * To make offset assignment easier, we create a memory operand + * for each of function parameters in advance. + * This has to be done after all of formal parameters and local + * variables get assigned their respecitve storage, i.e. + * CallFrameSize (discounting callee-saved and FP/LR) is known. + */ + MIRType *ty = mirFunction->GetNthParamType(i); + uint32 ptyIdx = ty->GetTypeIndex(); + static_cast(cgFunc)->GetOrCreateMemOpnd(*sym, 0, be.GetTypeAlign(ptyIdx) * kBitsPerByte); + } + } +} + +void AArch64MemLayout::LayoutStackFrame(int32 &structCopySize, int32 &maxParmStackSize) { + LayoutVarargParams(); + LayoutFormalParams(); + /* + * We do need this as LDR/STR with immediate + * requires imm be aligned at a 8/4-byte boundary, + * and local varirables may need 8-byte alignment. + */ + segArgsRegPassed.SetSize(RoundUp(segArgsRegPassed.GetSize(), kSizeOfPtr)); + /* we do need this as SP has to be aligned at a 16-bytes bounardy */ + segArgsStkPassed.SetSize(RoundUp(segArgsStkPassed.GetSize(), kSizeOfPtr + kSizeOfPtr)); + /* allocate the local variables in the stack */ + std::vector EATempVar; + std::vector retDelays; + LayoutLocalVariales(EATempVar, retDelays); + LayoutEAVariales(EATempVar); + + /* handle ret_ref sym now */ + LayoutReturnRef(retDelays, structCopySize, maxParmStackSize); + + /* + * for the actual arguments that cannot be pass through registers + * need to allocate space for caller-save registers + */ + LayoutActualParams(); + + fixStackSize = RealStackFrameSize(); +} + +void AArch64MemLayout::AssignSpillLocationsToPseudoRegisters() { + MIRPregTable *pregTab = cgFunc->GetFunction().GetPregTab(); + + /* BUG: n_regs include index 0 which is not a valid preg index. */ + size_t nRegs = pregTab->Size(); + spillLocTable.resize(nRegs); + for (size_t i = 1; i < nRegs; ++i) { + PrimType pType = pregTab->PregFromPregIdx(i)->GetPrimType(); + AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); + symLoc->SetMemSegment(segLocals); + segLocals.SetSize(RoundUp(segLocals.GetSize(), GetPrimTypeSize(pType))); + symLoc->SetOffset(segLocals.GetSize()); + MIRType *mirTy = GlobalTables::GetTypeTable().GetTypeTable()[pType]; + segLocals.SetSize(segLocals.GetSize() + be.GetTypeSize(mirTy->GetTypeIndex())); + spillLocTable[i] = symLoc; + } + + /* + * Allocate additional stack space for "thrownval". + * segLocals need 8 bit align + */ + segLocals.SetSize(RoundUp(segLocals.GetSize(), kSizeOfPtr)); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + RegOperand &baseOpnd = aarchCGFunc->GetOrCreateStackBaseRegOperand(); + int32 offset = segLocals.GetSize(); + + AArch64OfstOperand *offsetOpnd = + aarchCGFunc->GetMemoryPool()->New(offset + k16BitSize, k64BitSize); + AArch64MemOperand *throwMem = aarchCGFunc->GetMemoryPool()->New( + AArch64MemOperand::kAddrModeBOi, k64BitSize, baseOpnd, static_cast(nullptr), offsetOpnd, + nullptr); + aarchCGFunc->SetCatchOpnd(*throwMem); + segLocals.SetSize(segLocals.GetSize() + kSizeOfPtr); +} + +SymbolAlloc *AArch64MemLayout::AssignLocationToSpillReg(regno_t vrNum) { + AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); + symLoc->SetMemSegment(segSpillReg); + uint32 regSize = cgFunc->GetVRegSize(vrNum); + segSpillReg.SetSize(RoundUp(segSpillReg.GetSize(), regSize)); + symLoc->SetOffset(segSpillReg.GetSize()); + segSpillReg.SetSize(segSpillReg.GetSize() + regSize); + SetSpillRegLocInfo(vrNum, *symLoc); + return symLoc; +} + +int32 AArch64MemLayout::StackFrameSize() { + int32 total = segArgsRegPassed.GetSize() + static_cast(cgFunc)->SizeOfCalleeSaved() + + GetSizeOfRefLocals() + locals().GetSize() + GetSizeOfSpillReg(); + + if (GetSizeOfGRSaveArea() > 0) { + total += RoundUp(GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (GetSizeOfVRSaveArea() > 0) { + total += RoundUp(GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } + + /* + * if the function does not have VLA nor alloca, + * we allocate space for arguments to stack-pass + * in the call frame; otherwise, it has to be allocated for each call and reclaimed afterward. + */ + total += segArgsToStkPass.GetSize(); + return RoundUp(total, kAarch64StackPtrAlignment); +} + +int32 AArch64MemLayout::RealStackFrameSize() { + int32 size = StackFrameSize(); + if (cgFunc->GetCG()->AddStackGuard()) { + size += kAarch64StackPtrAlignment; + } + return size; +} + +int32 AArch64MemLayout::GetRefLocBaseLoc() const { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + int32 beforeSize = GetSizeOfLocals(); + if (aarchCGFunc->UsedStpSubPairForCallFrameAllocation()) { + return beforeSize; + } + return beforeSize + kSizeOfFplr; +} + +int32 AArch64MemLayout::GetGRSaveAreaBaseLoc() { + int32 total = RealStackFrameSize() - + RoundUp(GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + total -= SizeOfArgsToStackPass(); + return total; +} + +int32 AArch64MemLayout::GetVRSaveAreaBaseLoc() { + int32 total = RealStackFrameSize() - + RoundUp(GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment) - + RoundUp(GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + total -= SizeOfArgsToStackPass(); + return total; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_offset_adjust.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_offset_adjust.cpp new file mode 100644 index 0000000000000000000000000000000000000000..205d5e75e2de9c5706bb253fc48176d6642e3962 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_offset_adjust.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_offset_adjust.h" +#include "riscv64_cgfunc.h" + +namespace maplebe { +void AArch64FPLROffsetAdjustment::Run() { + AdjustmentOffsetForFPLR(); +} + +void AArch64FPLROffsetAdjustment::AdjustmentOffsetForOpnd(Insn &insn, AArch64CGFunc &aarchCGFunc) { + uint32 opndNum = insn.GetOperandSize(); + MemLayout *memLayout = aarchCGFunc.GetMemlayout(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + if (opnd.IsRegister()) { + auto ®Opnd = static_cast(opnd); + if (regOpnd.IsOfVary()) { + insn.SetOperand(i, aarchCGFunc.GetOrCreateStackBaseRegOperand()); + } + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + if (((memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi) || + (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOrX)) && + memOpnd.GetBaseRegister() != nullptr && memOpnd.GetBaseRegister()->IsOfVary()) { + memOpnd.SetBaseRegister(static_cast(aarchCGFunc.GetOrCreateStackBaseRegOperand())); + } + if ((memOpnd.GetAddrMode() != AArch64MemOperand::kAddrModeBOi) || !memOpnd.IsIntactIndexed()) { + continue; + } + AArch64OfstOperand *ofstOpnd = memOpnd.GetOffsetImmediate(); + if (ofstOpnd == nullptr) { + continue; + } + if (ofstOpnd->GetVary() == kUnAdjustVary) { + ofstOpnd->AdjustOffset(static_cast(memLayout)->RealStackFrameSize() - + memLayout->SizeOfArgsToStackPass()); + ofstOpnd->SetVary(kAdjustVary); + } + if (ofstOpnd->GetVary() == kAdjustVary) { + if (aarchCGFunc.IsImmediateOffsetOutOfRange(memOpnd, memOpnd.GetSize())) { + AArch64MemOperand &newMemOpnd = aarchCGFunc.SplitOffsetWithAddInstruction( + memOpnd, memOpnd.GetSize(), static_cast(R17), false, &insn); + insn.SetOperand(i, newMemOpnd); + } + } + } else if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + if (immOpnd.GetVary() == kUnAdjustVary) { + immOpnd.Add(static_cast(memLayout)->RealStackFrameSize() - + memLayout->SizeOfArgsToStackPass()); + } + immOpnd.SetVary(kAdjustVary); + } + } +} + +void AArch64FPLROffsetAdjustment::AdjustmentOffsetForFPLR() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + FOR_ALL_BB(bb, aarchCGFunc) { + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + AdjustmentOffsetForOpnd(*insn, *aarchCGFunc); + } + } + +#undef STKLAY_DBUG +#ifdef STKLAY_DBUG + AArch64MemLayout *aarch64memlayout = static_cast(cgFunc->GetMemlayout()); + LogInfo::MapleLogger() << "stkpass: " << aarch64memlayout->GetSegArgsStkpass().size << "\n"; + LogInfo::MapleLogger() << "local: " << aarch64memlayout->GetSizeOfLocals() << "\n"; + LogInfo::MapleLogger() << "ref local: " << aarch64memlayout->GetSizeOfRefLocals() << "\n"; + LogInfo::MapleLogger() << "regpass: " << aarch64memlayout->GetSegArgsRegPassed().size << "\n"; + LogInfo::MapleLogger() << "regspill: " << aarch64memlayout->GetSizeOfSpillReg() << "\n"; + LogInfo::MapleLogger() << "calleesave: " << SizeOfCalleeSaved() << "\n"; + +#endif +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_operand.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_operand.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49a963a980397262215473b605c416f164c7c18e --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_operand.cpp @@ -0,0 +1,565 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_operand.h" +#include +#include +#include "riscv64_abi.h" +#include "riscv64_cgfunc.h" +#include "riscv64_cg.h" + +namespace maplebe { +AArch64RegOperand AArch64RegOperand::zero64(RZR, k64BitSize, kRegTyInt); +AArch64RegOperand AArch64RegOperand::zero32(RZR, k32BitSize, kRegTyInt); + +const char *CondOperand::ccStrs[kCcLast] = { +#define CONDCODE(a) #a, +#include "riscv64_cc.def" +#undef CONDCODE +}; + +bool AArch64RegOperand::IsSaveReg(MIRType &type, BECommon &beCommon) const { + ReturnMechanism retMechanism(type, beCommon); + if (retMechanism.GetRegCount() > 0) { + return GetRegisterNumber() == retMechanism.GetReg0() || GetRegisterNumber() == retMechanism.GetReg1(); + } + return false; +} + +bool AArch64RegOperand::IsSPOrFP() const { + return (IsPhysicalRegister() && (regNO == RSP || regNO == RFP)); +} + +bool AArch64RegOperand::operator==(const AArch64RegOperand &o) const { + regno_t myRn = GetRegisterNumber(); + uint32 mySz = GetSize(); + uint32 myFl = flag; + regno_t otherRn = o.GetRegisterNumber(); + uint32 otherSz = o.GetSize(); + uint32 otherFl = o.flag; + + if (IsPhysicalRegister()) { + return (myRn == otherRn && mySz == otherSz && myFl == otherFl); + } + return (myRn == otherRn && mySz == otherSz); +} + +bool AArch64RegOperand::operator<(const AArch64RegOperand &o) const { + regno_t myRn = GetRegisterNumber(); + uint32 mySz = GetSize(); + uint32 myFl = flag; + regno_t otherRn = o.GetRegisterNumber(); + uint32 otherSz = o.GetSize(); + uint32 otherFl = o.flag; + return myRn < otherRn || (myRn == otherRn && mySz < otherSz) || + (myRn == otherRn && mySz == otherSz && myFl < otherFl); +} + +void AArch64RegOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + ASSERT((opndProp == nullptr || (static_cast(opndProp)->IsRegister())), + "operand type doesn't match"); + /* opndProp null means a sub emit, i.e from MemOperand */ + uint8 opndSize = (opndProp != nullptr) ? static_cast(opndProp)->GetSize() : size; + switch (GetRegisterType()) { + case kRegTyInt: { + ASSERT((opndSize == k32BitSize || opndSize == k64BitSize), "illegal register size"); +#ifdef USE_32BIT_REF + bool r32 = (opndSize == k32BitSize) || isRefField; +#else + bool r32 = (opndSize == k32BitSize); +#endif /* USE_32BIT_REF */ + emitter.Emit(AArch64CG::intRegNames[(r32 ? AArch64CG::kR32List : AArch64CG::kR64List)][regNO]); + break; + } + case kRegTyFloat: { + ASSERT((opndSize == k8BitSize || opndSize == k16BitSize || opndSize == k32BitSize || opndSize == k64BitSize), + "illegal register size"); + /* FP reg cannot be reffield. 8~0, 16~1, 32~2, 64~3. 8 is 1000b, has 3 zero. */ + uint32 regSet = __builtin_ctz(opndSize) - 3; + emitter.Emit(AArch64CG::intRegNames[regSet][regNO]); + break; + } + default: + ASSERT(false, "NYI"); + break; + } +} + +void AArch64ImmOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + if (!isFmov) { + emitter.Emit((opndProp != nullptr && static_cast(opndProp)->IsLoadLiteral()) ? "=" : "#") + .Emit((size == k64BitSize) ? value : static_cast(static_cast(value))); + return; + } + /* + * compute float value + * use top 4 bits expect MSB of value . then calculate its fourth power + */ + int32 exp = (((static_cast(value) & 0x70) >> 4) ^ 0x4) - 3; + /* use the lower four bits of value in this expression */ + const float mantissa = 1.0 + (static_cast(static_cast(value) & 0xf) / 16.0); + float result = std::pow(2, exp) * mantissa; + + std::stringstream ss; + ss << std::setprecision(10) << result; + std::string res; + ss >> res; + size_t dot = res.find('.'); + if (dot == std::string::npos) { + res += ".0"; + dot = res.find('.'); + CHECK_FATAL(dot != std::string::npos, "cannot find in string"); + } + res.erase(dot, 1); + std::string integer(res, 0, 1); + std::string fraction(res, 1); + while (fraction.size() != 1 && fraction[fraction.size() - 1] == '0') { + fraction.pop_back(); + } + /* fetch the sign bit of this value */ + std::string sign = static_cast(value) & 0x80 ? "-" : ""; + emitter.Emit(sign + integer + "." + fraction + "e+").Emit(dot - 1); +} + +void AArch64OfstOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + if (IsImmOffset()) { + emitter.Emit((opndProp != nullptr && static_cast(opndProp)->IsLoadLiteral()) ? "=" : "#") + .Emit((size == k64BitSize) ? GetValue() : static_cast(static_cast(GetValue()))); + return; + } + if (CGOptions::IsPIC() && + (symbol->GetStorageClass() == kScGlobal || symbol->GetStorageClass() == kScExtern)) { + emitter.Emit(":got:" + symbol->GetName()); + } else if (symbol->GetStorageClass() == kScPstatic && symbol->GetSKind() != kStConst && symbol->IsLocal()) { + emitter.Emit(symbol->GetName() + std::to_string(emitter.GetCG()->GetMIRModule()->CurFunction()->GetPuidx())); + } else { + emitter.Emit(symbol->GetName()); + } + if (GetValue() != 0) { + emitter.Emit("+" + std::to_string(GetValue())); + } +} + +bool StImmOperand::Less(const Operand &right) const{ + if (&right == this) { + return false; + } + + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + const StImmOperand *rightOpnd = static_cast(&right); + if (symbol != rightOpnd->symbol) { + return symbol < rightOpnd->symbol; + } + if (offset != rightOpnd->offset) { + return offset < rightOpnd->offset; + } + return relocs < rightOpnd->relocs; +} + +void StImmOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + CHECK_FATAL(opndProp != nullptr, "opndProp is nullptr in StImmOperand::Emit"); + if (static_cast(opndProp)->IsLiteralLow12()) { + emitter.Emit("#:lo12:" + GetName()); + if (offset != 0) { + emitter.Emit("+" + std::to_string(offset)); + } + return; + } + if (CGOptions::IsPIC() && (symbol->GetStorageClass() == kScGlobal || symbol->GetStorageClass() == kScExtern)) { + emitter.Emit(":got:" + GetName()); + } else if (symbol->GetStorageClass() == kScPstatic && symbol->GetSKind() != kStConst && symbol->IsLocal()) { + emitter.Emit(symbol->GetName() + std::to_string(emitter.GetCG()->GetMIRModule()->CurFunction()->GetPuidx())); + } else { + emitter.Emit(GetName()); + } + if (offset != 0) { + emitter.Emit("+" + std::to_string(offset)); + } +} + +const int32 AArch64MemOperand::kMaxPimms[4] = { AArch64MemOperand::kMaxPimm8, AArch64MemOperand::kMaxPimm16, + AArch64MemOperand::kMaxPimm32, AArch64MemOperand::kMaxPimm64 }; + +Operand *AArch64MemOperand::GetOffset() const { + switch (addrMode) { + case kAddrModeBOi: + return GetOffsetOperand(); + case kAddrModeBOrX: + return GetOffsetRegister(); + case kAddrModeLiteral: + break; + case kAddrModeLo12Li: + break; + default: + ASSERT(false, "error memoperand dump"); + break; + } + return nullptr; +} + +void AArch64MemOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + AArch64MemOperand::AArch64AddressingMode addressMode = GetAddrMode(); +#if DEBUG + const AArch64MD *md = &AArch64CG::kMd[emitter.GetCurrentMOP()]; + bool isLDSTpair = md->IsLoadStorePair(); + ASSERT(md->Is64Bit() || md->GetOperandSize() <= k32BitSize, "unexpected opnd size"); +#endif + if (addressMode == AArch64MemOperand::kAddrModeBOi) { + emitter.Emit("["); + auto *baseReg = static_cast(GetBaseRegister()); + ASSERT(baseReg != nullptr, "expect an AArch64RegOperand here"); + if (CGOptions::IsPIC() && (baseReg->GetSize() != k64BitSize)) { + baseReg->SetSize(k64BitSize); + } + baseReg->Emit(emitter, nullptr); + AArch64OfstOperand *offset = GetOffsetImmediate(); + if (offset != nullptr) { +#ifndef USE_32BIT_REF /* can be load a ref here */ + ASSERT(!IsOffsetMisaligned(md->GetOperandSize()), "should not be OffsetMisaligned"); +#endif /* USE_32BIT_REF */ + if (IsPostIndexed()) { + ASSERT(!IsSIMMOffsetOutOfRange(offset->GetOffsetValue(), md->Is64Bit(), isLDSTpair), + "should not be SIMMOffsetOutOfRange"); + emitter.Emit("]"); + if (!offset->IsZero()) { + emitter.Emit(", "); + offset->Emit(emitter, nullptr); + } + } else if (IsPreIndexed()) { + ASSERT(!IsSIMMOffsetOutOfRange(offset->GetOffsetValue(), md->Is64Bit(), isLDSTpair), + "should not be SIMMOffsetOutOfRange"); + if (!offset->IsZero()) { + emitter.Emit(","); + offset->Emit(emitter, nullptr); + } + emitter.Emit("]!"); + } else { + if (CGOptions::IsPIC() && (offset->IsSymOffset() || offset->IsSymAndImmOffset()) && + (offset->GetSymbol()->GetStorageClass() == kScGlobal || + offset->GetSymbol()->GetStorageClass() == kScExtern)) { + emitter.Emit(",#:got_lo12:"); + emitter.Emit(offset->GetSymbolName()); + } else { + ASSERT(!IsPIMMOffsetOutOfRange(offset->GetOffsetValue(), size), "should not be PIMMOffsetOutOfRange"); + if (!offset->IsZero()) { + emitter.Emit(","); + offset->Emit(emitter, nullptr); + } + } + emitter.Emit("]"); + } + } else { + emitter.Emit("]"); + } + } else if (addressMode == AArch64MemOperand::kAddrModeBOrX) { + /* + * Base plus offset | [base{, #imm}] [base, Xm{, LSL #imm}] [base, Wm, (S|U)XTW {#imm}] + * offset_opnds=nullptr + * offset_opnds=64 offset_opnds=32 + * imm=0 or 3 imm=0 or 2, s/u + */ + emitter.Emit("["); + GetBaseRegister()->Emit(emitter, nullptr); + emitter.Emit(","); + GetOffsetRegister()->Emit(emitter, nullptr); + if (ShouldEmitExtend()) { + emitter.Emit(","); + /* extend, #0, of #3/#2 */ + emitter.Emit(GetExtendAsString()); + if (GetExtendAsString() == "LSL" || ShiftAmount() != 0) { + emitter.Emit(" #"); + emitter.Emit(ShiftAmount()); + } + } + emitter.Emit("]"); + } else if (addressMode == AArch64MemOperand::kAddrModeLiteral) { + auto *prop = static_cast(opndProp); + CHECK_FATAL(prop != nullptr, "prop is nullptr in AArch64MemOperand::Emit"); + if (prop->IsMemLow12()) { + emitter.Emit("#:lo12:"); + } + emitter.Emit(GetSymbol()->GetName()); + } else if (addressMode == AArch64MemOperand::kAddrModeLo12Li) { + emitter.Emit("["); + GetBaseRegister()->Emit(emitter, nullptr); + + AArch64OfstOperand *offset = GetOffsetImmediate(); + ASSERT(offset != nullptr, "nullptr check"); + + emitter.Emit(", #:lo12:"); + if (GetSymbol()->GetStorageClass() == kScPstatic && GetSymbol()->IsLocal()) { + PUIdx pIdx = emitter.GetCG()->GetMIRModule()->CurFunction()->GetPuidx(); + emitter.Emit(GetSymbolName() + std::to_string(pIdx)); + } else { + emitter.Emit(GetSymbolName()); + } + if (!offset->IsZero()) { + emitter.Emit("+"); + emitter.Emit(std::to_string(offset->GetOffsetValue())); + } + emitter.Emit("]"); + } else { + ASSERT(false, "nyi"); + } +} + +void AArch64MemOperand::Dump() const { + LogInfo::MapleLogger() << "Mem:"; + switch (addrMode) { + case kAddrModeBOi: { + LogInfo::MapleLogger() << "base:"; + GetBaseRegister()->Dump(); + LogInfo::MapleLogger() << "offset:"; + GetOffsetOperand()->Dump(); + switch (idxOpt) { + case kIntact: + LogInfo::MapleLogger() << " intact"; + break; + case kPreIndex: + LogInfo::MapleLogger() << " pre-index"; + break; + case kPostIndex: + LogInfo::MapleLogger() << " post-index"; + break; + default: + break; + } + break; + } + case kAddrModeBOrX: { + LogInfo::MapleLogger() << "base:"; + GetBaseRegister()->Dump(); + LogInfo::MapleLogger() << "offset:"; + GetOffsetRegister()->Dump(); + LogInfo::MapleLogger() << " " << GetExtendAsString(); + LogInfo::MapleLogger() << " shift: " << ShiftAmount(); + break; + } + case kAddrModeLiteral: + LogInfo::MapleLogger() << "literal: " << GetSymbolName(); + break; + case kAddrModeLo12Li: { + LogInfo::MapleLogger() << "base:"; + GetBaseRegister()->Dump(); + LogInfo::MapleLogger() << "offset:"; + AArch64OfstOperand *offOpnd = GetOffsetImmediate(); + LogInfo::MapleLogger() << "#:lo12:"; + if (GetSymbol()->GetStorageClass() == kScPstatic && GetSymbol()->IsLocal()) { + PUIdx pIdx = CG::GetCurCGFunc()->GetMirModule().CurFunction()->GetPuidx(); + LogInfo::MapleLogger() << GetSymbolName() << std::to_string(pIdx); + } else { + LogInfo::MapleLogger() << GetSymbolName(); + } + LogInfo::MapleLogger() << "+" << std::to_string(offOpnd->GetOffsetValue()); + break; + } + default: + ASSERT(false, "error memoperand dump"); + break; + } +} + +bool AArch64MemOperand::Equals(Operand &operand) const { + if (!operand.IsMemoryAccessOperand()) { + return false; + } + return Equals(static_cast(operand)); +} + +bool AArch64MemOperand::Equals(AArch64MemOperand &op) const { + if (&op == this) { + return true; + } + + if (addrMode == op.GetAddrMode()) { + switch (addrMode) { + case kAddrModeBOi: + return (GetBaseRegister()->Equals(*op.GetBaseRegister()) && + GetOffsetImmediate()->Equals(*op.GetOffsetImmediate())); + case kAddrModeBOrX: + return (GetBaseRegister()->Equals(*op.GetBaseRegister()) && + GetOffsetRegister()->Equals(*op.GetOffsetRegister()) && + GetExtendAsString() == op.GetExtendAsString() && + ShiftAmount() == op.ShiftAmount()); + case kAddrModeLiteral: + return GetSymbolName() == op.GetSymbolName(); + case kAddrModeLo12Li: + return (GetBaseRegister()->Equals(*op.GetBaseRegister()) && + GetSymbolName() == op.GetSymbolName() && + GetOffsetImmediate()->Equals(*op.GetOffsetImmediate())); + default: + ASSERT(false, "error memoperand"); + break; + } + } + return false; +} + +bool AArch64MemOperand::Less(const Operand &right) const { + if (&right == this) { + return false; + } + + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + const AArch64MemOperand *rightOpnd = static_cast(&right); + if (addrMode != rightOpnd->addrMode) { + return addrMode < rightOpnd->addrMode; + } + + switch (addrMode) { + case kAddrModeBOi: { + ASSERT(idxOpt == kIntact, "Should not compare pre/post index addressing."); + + RegOperand *baseReg = GetBaseRegister(); + RegOperand *rbaseReg = rightOpnd->GetBaseRegister(); + int32 nRet = baseReg->RegCompare(*rbaseReg); + if (nRet == 0) { + Operand *ofstOpnd = GetOffsetOperand(); + const Operand *rofstOpnd = rightOpnd->GetOffsetOperand(); + return ofstOpnd->Less(*rofstOpnd); + } + return nRet < 0; + } + case kAddrModeBOrX: { + if (noExtend != rightOpnd->noExtend) { + return noExtend; + } + if (!noExtend && extend != rightOpnd->extend) { + return extend < rightOpnd->extend; + } + RegOperand *indexReg = GetIndexRegister(); + const RegOperand *rindexReg = rightOpnd->GetIndexRegister(); + return indexReg->Less(*rindexReg); + } + case kAddrModeLiteral: { + return static_cast(GetSymbol()) < static_cast(rightOpnd->GetSymbol()); + } + case kAddrModeLo12Li: { + if (GetSymbol() != rightOpnd->GetSymbol()) { + return static_cast(GetSymbol()) < static_cast(rightOpnd->GetSymbol()); + } + Operand *ofstOpnd = GetOffsetOperand(); + const Operand *rofstOpnd = rightOpnd->GetOffsetOperand(); + return ofstOpnd->Less(*rofstOpnd); + } + default: + ASSERT(false, "Internal error."); + return false; + } +} + +bool AArch64MemOperand::NoAlias(AArch64MemOperand &rightOpnd) const { + if (addrMode == kAddrModeBOi && rightOpnd.addrMode == kAddrModeBOi && idxOpt == kIntact && + rightOpnd.idxOpt == kIntact) { + RegOperand *baseReg = GetBaseRegister(); + RegOperand *rbaseReg = rightOpnd.GetBaseRegister(); + + if (baseReg->GetRegisterNumber() == RFP || rbaseReg->GetRegisterNumber() == RFP) { + Operand *ofstOpnd = GetOffsetOperand(); + Operand *rofstOpnd = rightOpnd.GetOffsetOperand(); + + ASSERT(ofstOpnd != nullptr, "offset operand should not be null."); + ASSERT(rofstOpnd != nullptr, "offset operand should not be null."); + OfstOperand *ofst = static_cast(ofstOpnd); + OfstOperand *rofst = static_cast(rofstOpnd); + ASSERT(ofst != nullptr, "CG internal error, invalid type."); + ASSERT(rofst != nullptr, "CG internal error, invalid type."); + + return (!ofst->ValueEquals(*rofst)); + } + } + + return false; +} + +/* sort the register operand according to their number */ +void AArch64ListOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + (void)opndProp; + size_t nLeft = opndList.size(); + if (nLeft == 0) { + return; + } + + for (auto it = opndList.begin(); it != opndList.end(); ++it) { + (*it)->Emit(emitter, nullptr); + if (--nLeft >= 1) { + emitter.Emit(", "); + } + } +} + +bool CondOperand::Less(const Operand &right) const { + if (&right == this) { + return false; + } + + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + const CondOperand *rightOpnd = static_cast(&right); + + /* The same type. */ + if (cc == CC_AL || rightOpnd->cc == CC_AL) { + return false; + } + return cc < rightOpnd->cc; +} + +bool ExtendShiftOperand::Less(const Operand &right) const { + if (&right == this) { + return false; + } + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + const ExtendShiftOperand *rightOpnd = static_cast(&right); + + /* The same type. */ + if (extendOp != rightOpnd->extendOp) { + return extendOp < rightOpnd->extendOp; + } + return shiftAmount < rightOpnd->shiftAmount; +} + +bool BitShiftOperand::Less(const Operand &right) const { + if (&right == this) { + return false; + } + + /* For different type. */ + if (GetKind() != right.GetKind()) { + return GetKind() < right.GetKind(); + } + + const BitShiftOperand *rightOpnd = static_cast(&right); + + /* The same type. */ + if (shiftOp != rightOpnd->shiftOp) { + return shiftOp < rightOpnd->shiftOp; + } + return shiftAmount < rightOpnd->shiftAmount; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_opnd.def b/src/mapleall/maple_be/src/cg/riscv64/riscv64_opnd.def new file mode 100644 index 0000000000000000000000000000000000000000..fdebd29b8475376546e8279d3b73c519d59c4f5b --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_opnd.def @@ -0,0 +1,156 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan PSL v1. + * You can use this software according to the terms and conditions of the Mulan PSL v1. + * You may obtain a copy of Mulan PSL v1 at: + * + * http://license.coscl.org.cn/MulanPSL + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v1 for more details. + */ +AArch64OpndProp mopdInt32RegSrc = {Operand::kOpdRegister, {kRegTyInt, kAllRegNum, kRegPropUse }, 32}; +AArch64OpndProp mopdInt32RegDest = {Operand::kOpdRegister, {kRegTyInt, kAllRegNum, kRegPropDef}, 32}; +AArch64OpndProp mopdInt32RegDestSrc = {Operand::kOpdRegister, {kRegTyInt, kAllRegNum, kRegPropDef | kRegPropUse}, 32}; +AArch64OpndProp mopdInt64RegSrc = {Operand::kOpdRegister, {kRegTyInt, kAllRegNum, kRegPropUse}, 64}; +AArch64OpndProp mopdInt64RegDest = {Operand::kOpdRegister, {kRegTyInt, kAllRegNum, kRegPropDef}, 64}; +AArch64OpndProp mopdInt64RegDestSrc = {Operand::kOpdRegister, {kRegTyInt, kAllRegNum, kRegPropDef | kRegPropUse}, 64}; +AArch64OpndProp mopdF8RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse}, 8}; +AArch64OpndProp mopdF8RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef}, 8}; +AArch64OpndProp mopdF16RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse}, 16}; +AArch64OpndProp mopdF16RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef}, 16}; +AArch64OpndProp mopdF32RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse}, 32}; +AArch64OpndProp mopdF32RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef}, 32}; +AArch64OpndProp mopdF32RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kRegPropUse}, 32}; +AArch64OpndProp mopdF64RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse}, 64}; +AArch64OpndProp mopdF64RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef}, 64}; +AArch64OpndProp mopdF64RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kRegPropUse}, 64}; +AArch64OpndProp mopdIntImm4Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 4}; +AArch64OpndProp mopdIntImm5Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 5}; +AArch64OpndProp mopdIntImm6Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 6}; +AArch64OpndProp mopdIntImm8Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 8}; +AArch64OpndProp mopdIntImm12Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 12}; +AArch64OpndProp mopdIntImm13Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 13}; +AArch64OpndProp mopdIntImm16Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 16}; +AArch64OpndProp mopdIntImm24Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 24}; +AArch64OpndProp mopdIntImm32Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 32}; +AArch64OpndProp mopdIntImm32Literal = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse | kLoadLiteral}, 32}; +AArch64OpndProp mopdIntImm64Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64}; +AArch64OpndProp mopdIntImm64Literal = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse | kLoadLiteral}, 64}; +AArch64OpndProp mopdFpzeroImm8Src = {Operand::kOpdFPZeroImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 8}; +AArch64OpndProp mopdMem8Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 8}; +AArch64OpndProp mopdMem16Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 16}; +AArch64OpndProp mopdMem32Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 32}; +AArch64OpndProp mopdMem32SrcH = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 16}; +AArch64OpndProp mopdMem32SrcL = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 16}; +AArch64OpndProp mopdMem64Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64}; +AArch64OpndProp mopdMem64SrcL = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 12}; + +AArch64OpndProp mopdMem8Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 8}; +AArch64OpndProp mopdMem16Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 16}; +AArch64OpndProp mopdMem32Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 32}; +AArch64OpndProp mopdMem64Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 64}; + +AArch64OpndProp mopdLbl64Src = {Operand::kOpdBBAddress, {kRegTyUndef,kAllRegNum, kRegPropUse}, 64}; +AArch64OpndProp mopdLiteralSrc = {Operand::kOpdStImmediate, {kRegTyUndef,kAllRegNum, kRegPropUse}, 64}; +AArch64OpndProp mopdLiteralL12Src = {Operand::kOpdStImmediate, {kRegTyUndef, kAllRegNum, kLiteralLow12}, 12}; +AArch64OpndProp mopdListSrc = {Operand::kOpdList, {kRegTyUndef, kAllRegNum, kRegPropUse}, 1}; +AArch64OpndProp mopdCcRegSrc = {Operand::kOpdRegister, {kRegTyCc, kAllRegNum, kRegPropUse}, 1}; +AArch64OpndProp mopdCcRegDest = {Operand::kOpdRegister, {kRegTyCc, kAllRegNum, kRegPropDef}, 1}; +AArch64OpndProp mopdCcRegDestSrc = {Operand::kOpdRegister, {kRegTyCc, kAllRegNum, kRegPropDef | kRegPropUse}, 1}; +AArch64OpndProp mopdSpRegDest = {Operand::kOpdRegister, {kRegTyInt, RSP, kRegPropDef}, 32}; +AArch64OpndProp mopdMem32SrcPre = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kPreInc}, 32}; +AArch64OpndProp mopdMem32SrcPost = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kPostInc}, 32}; +AArch64OpndProp mopdMem64SrcPre = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kPreInc}, 64}; +AArch64OpndProp mopdMem64SrcPost = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kPostInc}, 64}; +AArch64OpndProp mopdMem32LiteralSrc = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 32}; +AArch64OpndProp mopdMem64LiteralSrc = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64}; + +AArch64OpndProp mopdCondSrc = {Operand::kOpdCond, {kRegTyCc, kAllRegNum, kRegPropUse}, 4}; + +AArch64OpndProp mopdBitshift32Src = {Operand::kOpdShift, {kRegTyUndef, kAllRegNum, kRegPropUse}, 5}; +AArch64OpndProp mopdBitshift64Src = {Operand::kOpdShift, {kRegTyUndef, kAllRegNum, kRegPropUse}, 6}; +AArch64OpndProp mopdExtendshift64Src = {Operand::kOpdExtend, {kRegTyUndef, kAllRegNum, kRegPropUse}, 3}; +AArch64OpndProp mopdLsl4Src = {Operand::kOpdShift, {kRegTyUndef, kAllRegNum, kRegPropUse}, 4}; +AArch64OpndProp mopdLsl6Src = {Operand::kOpdShift, {kRegTyUndef, kAllRegNum, kRegPropUse}, 6}; +AArch64OpndProp mopdLsl12Src = {Operand::kOpdShift, {kRegTyUndef, kAllRegNum, kRegPropUse}, 12}; + +AArch64OpndProp mopdString = {Operand::kOpdString, {kRegTyUndef, kAllRegNum, kRegPropUse}, 0}; + +// physical register + +AArch64OpndProp *MOPDReg = &mopdInt32RegSrc; +// in mopdReg32IS, Reg means register, 32 means 32-bits, I means integer(F means float), +// S means source, D means dest, H means high harf bits, L means low harf bits +AArch64OpndProp *mopdReg32IS = &mopdInt32RegSrc; +AArch64OpndProp *mopdReg32ID = &mopdInt32RegDest; +AArch64OpndProp *mopdReg32IDS = &mopdInt32RegDestSrc; +AArch64OpndProp *mopdReg64IS = &mopdInt64RegSrc; +AArch64OpndProp *mopdReg64ID = &mopdInt64RegDest; +AArch64OpndProp *mopdReg64IDS = &mopdInt64RegDestSrc; +AArch64OpndProp *mopdReg8FS = &mopdF8RegSrc; +AArch64OpndProp *mopdReg8FD = &mopdF8RegDest; +AArch64OpndProp *mopdReg16FS = &mopdF16RegSrc; +AArch64OpndProp *mopdReg16FD = &mopdF16RegDest; +AArch64OpndProp *mopdReg32FS = &mopdF32RegSrc; +AArch64OpndProp *mopdReg32FD = &mopdF32RegDest; +AArch64OpndProp *mopdReg32FDS = &mopdF32RegDestSrc; +AArch64OpndProp *mopdReg64FS = &mopdF64RegSrc; +AArch64OpndProp *mopdReg64FD = &mopdF64RegDest; +AArch64OpndProp *mopdReg64FDS = &mopdF64RegDestSrc; +AArch64OpndProp *mopdMem = &mopdMem32Src; +AArch64OpndProp *mopdMem8S = &mopdMem8Src; +AArch64OpndProp *mopdMem16S = &mopdMem16Src; +AArch64OpndProp *mopdMem32S = &mopdMem32Src; +AArch64OpndProp *mopdMem32SL = &mopdMem32SrcL; +AArch64OpndProp *mopdMem32SH = &mopdMem32SrcH; +AArch64OpndProp *mopdMem64S = &mopdMem64Src; +AArch64OpndProp *mopdMem64SL = &mopdMem64SrcL; +AArch64OpndProp *mopdMem8D = &mopdMem8Dest; +AArch64OpndProp *mopdMem16D = &mopdMem16Dest; +AArch64OpndProp *mopdMem32D = &mopdMem32Dest; +AArch64OpndProp *mopdMem64D = &mopdMem64Dest; +AArch64OpndProp *mopdMem32SPRE = &mopdMem32SrcPre; +AArch64OpndProp *mopdMem32SPOST = &mopdMem32SrcPost; +AArch64OpndProp *mopdMem64SPRE = &mopdMem64SrcPre; +AArch64OpndProp *mopdMem64SPOST = &mopdMem64SrcPost; +AArch64OpndProp *mopdMem32LiteralS = &mopdMem32LiteralSrc; +AArch64OpndProp *mopdMem64LiteralS = &mopdMem64LiteralSrc; +AArch64OpndProp *mopdImm4 = &mopdIntImm4Src; +AArch64OpndProp *mopdImm5 = &mopdIntImm5Src; +AArch64OpndProp *mopdImm6 = &mopdIntImm6Src; +AArch64OpndProp *mopdImm8 = &mopdIntImm8Src; +AArch64OpndProp *mopdImm12 = &mopdIntImm12Src; +AArch64OpndProp *mopdImm13 = &mopdIntImm13Src; +AArch64OpndProp *mopdImm16 = &mopdIntImm16Src; +AArch64OpndProp *mopdImm24 = &mopdIntImm24Src; +AArch64OpndProp *mopdImm32 = &mopdIntImm32Src; +AArch64OpndProp* mopdImm32LI = &mopdIntImm32Literal; +AArch64OpndProp *mopdImm64 = &mopdIntImm64Src; +AArch64OpndProp* mopdImm64LI = &mopdIntImm64Literal; +AArch64OpndProp *mopdFPZeroImm8 = &mopdFpzeroImm8Src; +AArch64OpndProp *mopdFuncName = &mopdLbl64Src; +AArch64OpndProp *mopdLabel = &mopdLbl64Src; +AArch64OpndProp *mopdLiteral = &mopdLiteralSrc; +AArch64OpndProp *mopdLiteralL12 = &mopdLiteralL12Src; + +AArch64OpndProp *mopdRegCCS = &mopdCcRegSrc; +AArch64OpndProp *mopdRegCCD = &mopdCcRegDest; +AArch64OpndProp *mopdRegCCDS = &mopdCcRegDestSrc; + +AArch64OpndProp *mopdCond = &mopdCondSrc; + +AArch64OpndProp *mopdBitShift32 = &mopdBitshift32Src; +AArch64OpndProp *mopdBitShift64 = &mopdBitshift64Src; +AArch64OpndProp *mopdExtendShift64 = &mopdExtendshift64Src; +AArch64OpndProp *mopdLSL4 = &mopdLsl4Src; +AArch64OpndProp *mopdLSL6 = &mopdLsl6Src; +AArch64OpndProp *mopdLSL12 = &mopdLsl12Src; + +AArch64OpndProp *mopdRSPD = &mopdSpRegDest; +AArch64OpndProp *mopdLISTS = &mopdListSrc; +AArch64OpndProp *mopdSTRING = &mopdString; +AArch64OpndProp *mopdUndef = nullptr; diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_optimize_common.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_optimize_common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fb6f6a8a787270f51e30e447409135c73fbfd469 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_optimize_common.cpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_optimize_common.h" +#include "riscv64_isa.h" +#include "riscv64_cgfunc.h" +#include "cgbb.h" + +namespace maplebe { +namespace { +constexpr int kTbxTargetIdx = 2; +}; + +MOperator AArch64InsnVisitor::FlipConditionOp(MOperator originalOp, int &targetIdx) { + targetIdx = 1; + switch (originalOp) { + case AArch64MOP_t::MOP_beq: + return AArch64MOP_t::MOP_bne; + case AArch64MOP_t::MOP_bge: + return AArch64MOP_t::MOP_blt; + case AArch64MOP_t::MOP_bgt: + return AArch64MOP_t::MOP_ble; + case AArch64MOP_t::MOP_bhi: + return AArch64MOP_t::MOP_bls; + case AArch64MOP_t::MOP_bhs: + return AArch64MOP_t::MOP_blo; + case AArch64MOP_t::MOP_ble: + return AArch64MOP_t::MOP_bgt; + case AArch64MOP_t::MOP_blo: + return AArch64MOP_t::MOP_bhs; + case AArch64MOP_t::MOP_bls: + return AArch64MOP_t::MOP_bhi; + case AArch64MOP_t::MOP_blt: + return AArch64MOP_t::MOP_bge; + case AArch64MOP_t::MOP_bne: + return AArch64MOP_t::MOP_beq; + case AArch64MOP_t::MOP_xcbnz: + return AArch64MOP_t::MOP_xcbz; + case AArch64MOP_t::MOP_wcbnz: + return AArch64MOP_t::MOP_wcbz; + case AArch64MOP_t::MOP_xcbz: + return AArch64MOP_t::MOP_xcbnz; + case AArch64MOP_t::MOP_wcbz: + return AArch64MOP_t::MOP_wcbnz; + case AArch64MOP_t::MOP_wtbnz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_wtbz; + case AArch64MOP_t::MOP_wtbz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_wtbnz; + case AArch64MOP_t::MOP_xtbnz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_xtbz; + case AArch64MOP_t::MOP_xtbz: + targetIdx = kTbxTargetIdx; + return AArch64MOP_t::MOP_xtbnz; + default: + break; + } + return AArch64MOP_t::MOP_undef; +} + +void AArch64InsnVisitor::ModifyJumpTarget(Operand &targetOperand, BB &bb) { + bb.GetLastInsn()->SetOperand(GetJumpTargetIdx(*(bb.GetLastInsn())), targetOperand); +} + +void AArch64InsnVisitor::ModifyJumpTarget(maple::LabelIdx targetLabel, BB &bb) { + ModifyJumpTarget(static_cast(GetCGFunc())->GetOrCreateLabelOperand(targetLabel), bb); +} + +void AArch64InsnVisitor::ModifyJumpTarget(BB &newTarget, BB &bb) { + ModifyJumpTarget(newTarget.GetLastInsn()->GetOperand(GetJumpTargetIdx(*(newTarget.GetLastInsn()))), bb); +} + +Insn *AArch64InsnVisitor::CloneInsn(Insn &originalInsn) { + MemPool *memPool = const_cast(CG::GetCurCGFunc()->GetMemoryPool()); + if (originalInsn.IsTargetInsn()) { + return memPool->Clone(*static_cast(&originalInsn)); + } else if (originalInsn.IsCfiInsn()) { + return memPool->Clone(*static_cast(&originalInsn)); + } + CHECK_FATAL(false, "Cannot clone"); + return nullptr; +} + +/* + * Precondition: The given insn is a jump instruction. + * Get the jump target label operand index from the given instruction. + * Note: MOP_xbr is a jump instruction, but the target is unknown at compile time, + * because a register instead of label. So we don't take it as a branching instruction. + */ +int AArch64InsnVisitor::GetJumpTargetIdx(const Insn &insn) const { + MOperator mOp = insn.GetMachineOpcode(); + switch (mOp) { + /* unconditional jump */ + case MOP_xuncond: { + return 0; + } + /* conditional jump */ + case MOP_bmi: + case MOP_bvc: + case MOP_bls: + case MOP_blt: + case MOP_ble: + case MOP_blo: + case MOP_beq: + case MOP_bpl: + case MOP_bhs: + case MOP_bvs: + case MOP_bhi: + case MOP_bgt: + case MOP_bge: + case MOP_bne: + case MOP_wcbz: + case MOP_xcbz: + case MOP_wcbnz: + case MOP_xcbnz: { + return 1; + } + case MOP_wtbz: + case MOP_xtbz: + case MOP_wtbnz: + case MOP_xtbnz: { + return kTbxTargetIdx; + } + default: + CHECK_FATAL(false, "Not a jump insn"); + } + return 0; +} + +/* + * Precondition: The given insn is a jump instruction. + * Get the jump target label from the given instruction. + * Note: MOP_xbr is a branching instruction, but the target is unknown at compile time, + * because a register instead of label. So we don't take it as a branching instruction. + */ +LabelIdx AArch64InsnVisitor::GetJumpLabel(const Insn &insn) const { + int operandIdx = GetJumpTargetIdx(insn); + if (insn.GetOperand(operandIdx).IsLabelOpnd()) { + return static_cast(insn.GetOperand(operandIdx)).GetLabelIndex(); + } + ASSERT(false, "Operand is not label"); + return 0; +} + +bool AArch64InsnVisitor::IsCompareInsn(const Insn &insn) const { + switch (insn.GetMachineOpcode()) { + case MOP_wcmpri: + case MOP_wcmprr: + case MOP_xcmpri: + case MOP_xcmprr: + case MOP_hcmperi: + case MOP_hcmperr: + case MOP_scmperi: + case MOP_scmperr: + case MOP_dcmperi: + case MOP_dcmperr: + case MOP_hcmpqri: + case MOP_hcmpqrr: + case MOP_scmpqri: + case MOP_scmpqrr: + case MOP_dcmpqri: + case MOP_dcmpqrr: + case MOP_wcmnri: + case MOP_wcmnrr: + case MOP_xcmnri: + case MOP_xcmnrr: + return true; + default: + return false; + } +} + +bool AArch64InsnVisitor::IsCompareAndBranchInsn(const Insn &insn) const { + switch (insn.GetMachineOpcode()) { + case MOP_wcbnz: + case MOP_xcbnz: + case MOP_wcbz: + case MOP_xcbz: + return true; + default: + return false; + } +} + +RegOperand *AArch64InsnVisitor::CreateVregFromReg(const RegOperand &pReg) { + return &static_cast(GetCGFunc())->CreateRegisterOperandOfType( + pReg.GetRegisterType(), pReg.GetSize() / k8BitSize); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_peep.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_peep.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b8400e7d6aa34ffa5ec863a0a88272b5cf1a89bc --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_peep.cpp @@ -0,0 +1,2711 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_peep.h" +#include "cg.h" +#include "mpl_logging.h" +#include "common_utils.h" + +namespace maplebe { +#define JAVALANG (cgFunc.GetMirModule().IsJavaModule()) +namespace { +const std::string kMccLoadRef = "MCC_LoadRefField"; +const std::string kMccLoadRefV = "MCC_LoadVolatileField"; +const std::string kMccLoadRefS = "MCC_LoadRefStatic"; +const std::string kMccLoadRefVS = "MCC_LoadVolatileStaticField"; +const std::string kMccDummy = "MCC_Dummy"; + +const std::string GetReadBarrierName(const Insn &insn) { + constexpr int32 totalBarrierNamesNum = 5; + std::array barrierNames = { + kMccLoadRef, kMccLoadRefV, kMccLoadRefS, kMccLoadRefVS, kMccDummy + }; + if (insn.GetMachineOpcode() == MOP_xbl || + insn.GetMachineOpcode() == MOP_tail_call_opt_xbl) { + auto &op = static_cast(insn.GetOperand(kInsnFirstOpnd)); + const std::string &funcName = op.GetName(); + for (const std::string &singleBarrierName : barrierNames) { + if (funcName == singleBarrierName) { + return singleBarrierName; + } + } + } + return ""; +} + +MOperator GetLoadOperator(uint32 refSize, bool isVolatile) { + if (refSize == k32BitSize) { + return isVolatile ? MOP_wldar : MOP_wldr; + } + return isVolatile ? MOP_xldar : MOP_xldr; +} +} + +void AArch64PeepHole::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveIdenticalLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveMovingtoSameRegOpt] = optOwnMemPool->New(cgFunc);; + optimizations[kCombineContiLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEliminateSpecifcSXTOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEliminateSpecifcUXTOpt] = optOwnMemPool->New(cgFunc); + optimizations[kFmovRegOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCbnzToCbzOpt] = optOwnMemPool->New(cgFunc); + optimizations[kContiLDRorSTRToSameMEMOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveIncDecRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kInlineReadBarriersOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceDivToMultiOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToCsetOpt] = optOwnMemPool->New(cgFunc); + optimizations[kZeroCmpBranchesOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PeepHole::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_wmovrr: + case MOP_xmovrr: + case MOP_xvmovs: + case MOP_xvmovd: { + (static_cast(optimizations[kRemoveMovingtoSameRegOpt]))->Run(bb, insn); + break; + } + case MOP_xldr: + case MOP_xstr: + case MOP_wldr: + case MOP_wstr: + case MOP_dldr: + case MOP_dstr: + case MOP_sldr: + case MOP_sstr: { + (static_cast(optimizations[kCombineContiLoadAndStoreOpt]))->Run(bb, insn); + (static_cast(optimizations[kContiLDRorSTRToSameMEMOpt]))->Run(bb, insn); + (static_cast(optimizations[kRemoveIdenticalLoadAndStoreOpt]))->Run(bb, insn); + break; + } + case MOP_xsxtb32: + case MOP_xsxth32: + case MOP_xsxtb64: + case MOP_xsxth64: + case MOP_xsxtw64: { + (static_cast(optimizations[kEliminateSpecifcSXTOpt]))->Run(bb, insn); + break; + } + case MOP_xuxtb32: + case MOP_xuxth32: + case MOP_xuxtw64: { + (static_cast(optimizations[kEliminateSpecifcUXTOpt]))->Run(bb, insn); + break; + } + case MOP_xvmovrv: + case MOP_xvmovrd: { + (static_cast(optimizations[kFmovRegOpt]))->Run(bb, insn); + break; + } + case MOP_wcbnz: + case MOP_xcbnz: { + (static_cast(optimizations[kCbnzToCbzOpt]))->Run(bb, insn); + break; + } + case MOP_xbl: { + (static_cast(optimizations[kRemoveIncDecRefOpt]))->Run(bb, insn); + break; + } + case MOP_wsdivrrr: { + (static_cast(optimizations[kReplaceDivToMultiOpt]))->Run(bb, insn); + break; + } + case MOP_wcsetrc: + case MOP_xcsetrc: { + (static_cast(optimizations[kAndCmpBranchesToCsetOpt]))->Run(bb, insn); + break; + } + default: + break; + } + if (GetReadBarrierName(insn) != "") { /* skip if it is not a read barrier call. */ + (static_cast(optimizations[kInlineReadBarriersOpt]))->Run(bb, insn); + } + if (&insn == bb.GetLastInsn()) { + (static_cast(optimizations[kZeroCmpBranchesOpt]))->Run(bb, insn); + } +} + +void AArch64PeepHole0::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveIdenticalLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCmpCsetOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptAdd] = optOwnMemPool->New(cgFunc); + optimizations[kDeleteMovAfterCbzOrCbnzOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PeepHole0::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xstr: + case MOP_wstr: { + (static_cast(optimizations[kRemoveIdenticalLoadAndStoreOpt]))->Run(bb, insn); + break; + } + case MOP_wcmpri: + case MOP_xcmpri: { + (static_cast(optimizations[kCmpCsetOpt]))->Run(bb, insn); + break; + } + case MOP_xaddrrr: { + (static_cast(optimizations[kComplexMemOperandOptAdd]))->Run(bb, insn); + break; + } + case MOP_wcbz: + case MOP_xcbz: + case MOP_wcbnz: + case MOP_xcbnz: { + (static_cast(optimizations[kDeleteMovAfterCbzOrCbnzOpt]))->Run(bb, insn); + break; + } + default: + break; + } +} + +void AArch64PrePeepHole::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kOneHoleBranchesPreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kLoadFloatPointOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceOrrToMovOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceCmpToCmnOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveIncRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kLongIntCompareWithZOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandPreOptAdd] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptLSL] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptLabel] = optOwnMemPool->New(cgFunc); + optimizations[kWriteFieldCallOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PrePeepHole::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xmovzri16: { + (static_cast(optimizations[kLoadFloatPointOpt]))->Run(bb, insn); + break; + } + case MOP_wiorri12r: + case MOP_wiorrri12: + case MOP_xiorri13r: + case MOP_xiorrri13: { + (static_cast(optimizations[kReplaceOrrToMovOpt]))->Run(bb, insn); + break; + } + case MOP_xmovri32: + case MOP_xmovri64: { + (static_cast(optimizations[kReplaceCmpToCmnOpt]))->Run(bb, insn); + break; + } + case MOP_xbl: { + (static_cast(optimizations[kRemoveIncRefOpt]))->Run(bb, insn); + if (CGOptions::IsGCOnly() && CGOptions::DoWriteRefFieldOpt()) { + (static_cast(optimizations[kWriteFieldCallOpt]))->Run(bb, insn); + } + break; + } + case MOP_xcmpri: { + (static_cast(optimizations[kLongIntCompareWithZOpt]))->Run(bb, insn); + break; + } + case MOP_xadrpl12: { + (static_cast(optimizations[kComplexMemOperandOpt]))->Run(bb, insn); + break; + } + case MOP_xaddrrr: { + (static_cast(optimizations[kComplexMemOperandPreOptAdd]))->Run(bb, insn); + break; + } + case MOP_xaddrrrs: { + (static_cast(optimizations[kComplexMemOperandOptLSL]))->Run(bb, insn); + break; + } + case MOP_xldli: { + (static_cast(optimizations[kComplexMemOperandOptLabel]))->Run(bb, insn); + break; + } + default: + break; + } + if (&insn == bb.GetLastInsn()) { + (static_cast(optimizations[kOneHoleBranchesPreOpt]))->Run(bb, insn); + if (CGOptions::IsGCOnly() && CGOptions::DoWriteRefFieldOpt()) { + (static_cast(optimizations[kWriteFieldCallOpt]))->Reset(); + } + } +} + +void AArch64PrePeepHole1::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveDecRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComputationTreeOpt] = optOwnMemPool->New(cgFunc); + optimizations[kOneHoleBranchesOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceIncDecWithIncOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToTbzOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PrePeepHole1::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xbl: { + if (JAVALANG) { + (static_cast(optimizations[kRemoveDecRefOpt]))->Run(bb, insn); + (static_cast(optimizations[kReplaceIncDecWithIncOpt]))->Run(bb, insn); + } + break; + } + case MOP_xaddrri12: { + (static_cast(optimizations[kComputationTreeOpt]))->Run(bb, insn); + break; + } + default: + break; + } + if (&insn == bb.GetLastInsn()) { + switch (thisMop) { + case MOP_wcbz: + case MOP_wcbnz: + case MOP_xcbz: + case MOP_xcbnz: { + (static_cast(optimizations[kOneHoleBranchesOpt]))->Run(bb, insn); + break; + } + case MOP_beq: + case MOP_bne: { + (static_cast(optimizations[kAndCmpBranchesToTbzOpt]))->Run(bb, insn); + break; + } + default: + break; + } + } +} + +void RemoveIdenticalLoadAndStoreAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr) { + return; + } + MOperator mop1 = insn.GetMachineOpcode(); + MOperator mop2 = nextInsn->GetMachineOpcode(); + if ((mop1 == MOP_wstr && mop2 == MOP_wstr) || (mop1 == MOP_xstr && mop2 == MOP_xstr)) { + if (IsMemOperandsIdentical(insn, *nextInsn)) { + bb.RemoveInsn(insn); + insn = *nextInsn; + } + } else if ((mop1 == MOP_wstr && mop2 == MOP_wldr) || (mop1 == MOP_xstr && mop2 == MOP_xldr)) { + if (IsMemOperandsIdentical(insn, *nextInsn)) { + bb.RemoveInsn(*nextInsn); + } + } +} + +bool RemoveIdenticalLoadAndStoreAArch64::IsMemOperandsIdentical(const Insn &insn1, const Insn &insn2) const { + regno_t regNO1 = static_cast(insn1.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + regno_t regNO2 = static_cast(insn2.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + if (regNO1 != regNO2) { + return false; + } + /* Match only [base + offset] */ + auto &memOpnd1 = static_cast(insn1.GetOperand(kInsnSecondOpnd)); + if (static_cast(memOpnd1).GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + auto &memOpnd2 = static_cast(insn2.GetOperand(kInsnSecondOpnd)); + if (static_cast(memOpnd2).GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + Operand *base1 = memOpnd1.GetBaseRegister(); + Operand *base2 = memOpnd2.GetBaseRegister(); + if (!((base1 != nullptr) && base1->IsRegister()) || !((base2 != nullptr) && base2->IsRegister())) { + return false; + } + + regno_t baseRegNO1 = static_cast(base1)->GetRegisterNumber(); + /* First insn re-write base addr reg1 <- [ reg1 + offset ] */ + if (baseRegNO1 == regNO1) { + return false; + } + + regno_t baseRegNO2 = static_cast(base2)->GetRegisterNumber(); + if (baseRegNO1 != baseRegNO2) { + return false; + } + + if (static_cast(memOpnd1).GetOffsetImmediate()->GetOffsetValue() != + static_cast(memOpnd2).GetOffsetImmediate()->GetOffsetValue()) { + return false; + } + return true; +} + +void RemoveMovingtoSameRegAArch64::Run(BB &bb, Insn &insn) { + ASSERT(insn.GetOperand(kInsnFirstOpnd).IsRegister(), "expects registers"); + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsRegister(), "expects registers"); + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + + if ((reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) && (reg1.GetSize() == reg2.GetSize())) { + bb.RemoveInsn(insn); + } +} + +/* Combining 2 STRs into 1 stp or 2 LDRs into 1 ldp */ +void CombineContiLoadAndStoreAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr || nextInsn->GetMachineOpcode() != thisMop) { + return; + } + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + ASSERT(nextInsn->GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + auto &memOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + + AArch64MemOperand::AArch64AddressingMode addrMode1 = memOpnd1.GetAddrMode(); + if (addrMode1 != AArch64MemOperand::kAddrModeBOi || (!memOpnd1.IsIntactIndexed())) { + return; + } + + auto *base1 = static_cast(memOpnd1.GetBaseRegister()); + ASSERT(base1 == nullptr || !base1->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset1 = memOpnd1.GetOffsetImmediate(); + + auto &memOpnd2 = static_cast(nextInsn->GetOperand(kInsnSecondOpnd)); + + AArch64MemOperand::AArch64AddressingMode addrMode2 = memOpnd2.GetAddrMode(); + if (addrMode2 != AArch64MemOperand::kAddrModeBOi || (!memOpnd2.IsIntactIndexed())) { + return; + } + + auto *base2 = static_cast(memOpnd2.GetBaseRegister()); + ASSERT(base2 == nullptr || !base2->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset2 = memOpnd2.GetOffsetImmediate(); + + if (base1 == nullptr || base2 == nullptr || offset1 == nullptr || offset2 == nullptr) { + return; + } + + /* + * In ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile + * LDP on page K1-6125 delcare that ldp can't use same reg + */ + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(nextInsn->GetOperand(kInsnFirstOpnd)); + if ((thisMop == MOP_xldr || thisMop == MOP_sldr || thisMop == MOP_dldr || thisMop == MOP_wldr) && + reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) { + return; + } + + if (reg1.GetSize() != memOpnd1.GetSize() || reg2.GetSize() != memOpnd2.GetSize()) { + return; + } + + uint32 size = reg1.GetSize() >> kLog2BitsPerByte; + int offsetVal1 = offset1->GetOffsetValue(); + int offsetVal2 = offset2->GetOffsetValue(); + if ((base1->GetRegisterNumber() == RFP || base1->GetRegisterNumber() == RSP) && + base1->GetRegisterNumber() == base2->GetRegisterNumber() && + reg1.GetRegisterType() == reg2.GetRegisterType() && reg1.GetSize() == reg2.GetSize() && + abs(offsetVal1 - offsetVal2) == static_cast(size)) { + /* pair instr for 8/4 byte registers must have multiple of 8/4 for imm */ + if ((static_cast(offsetVal1) % size) != 0) { + return; + } + /* For stp/ldp, the imm should be within -512 and 504. */ + if (size == kIntregBytelen) { + if (offsetVal1 <= kStpLdpImm64LowerBound || offsetVal1 >= kStpLdpImm64UpperBound) { + return; + } + } + if (size == (kIntregBytelen >> 1)) { + if (offsetVal1 <= kStpLdpImm32LowerBound || offsetVal1 >= kStpLdpImm32UpperBound) { + return; + } + } + + MOperator mopPair = GetMopPair(thisMop); + CG *cg = cgFunc.GetCG(); + if (offsetVal1 < offsetVal2) { + bb.InsertInsnAfter(*nextInsn, cg->BuildInstruction(mopPair, reg1, reg2, memOpnd1)); + } else { + bb.InsertInsnAfter(*nextInsn, cg->BuildInstruction(mopPair, reg2, reg1, memOpnd2)); + } + + /* keep the comment */ + Insn *nn = nextInsn->GetNext(); + std::string newComment = ""; + MapleString comment = insn.GetComment(); + if (comment.c_str() != nullptr && strlen(comment.c_str()) > 0) { + newComment += comment.c_str(); + } + comment = nextInsn->GetComment(); + if (newComment.c_str() != nullptr && strlen(newComment.c_str()) > 0) { + newComment += " "; + } + if (comment.c_str() != nullptr && strlen(comment.c_str()) > 0) { + newComment += comment.c_str(); + } + if (newComment.c_str() != nullptr && strlen(newComment.c_str()) > 0) { + nn->SetComment(newComment); + } + bb.RemoveInsn(insn); + bb.RemoveInsn(*nextInsn); + insn = *nn; + } /* pattern found */ +} + +void EliminateSpecifcSXTAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPrev(); + while (prevInsn != nullptr && !prevInsn->GetMachineOpcode()) { + prevInsn = prevInsn->GetPrev(); + } + if (prevInsn == nullptr) { + return; + } + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®Opnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (&insn != bb.GetFirstInsn() && regOpnd0.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + prevInsn->IsMachineInstruction()) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + if (thisMop == MOP_xsxtb32) { + /* value should in range between -127 and 127 */ + if (value >= static_cast(0xFFFFFFFFFFFFFF80) && value <= 0x7F && + immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + bb.RemoveInsn(insn); + } + } else if (thisMop == MOP_xsxth32) { + /* value should in range between -32678 and 32678 */ + if (value >= static_cast(0xFFFFFFFFFFFF8000) && value <= 0x7FFF && + immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + bb.RemoveInsn(insn); + } + } else { + uint64 flag = 0xFFFFFFFFFFFFFF80; /* initialize the flag with fifty-nine 1s at top */ + if (thisMop == MOP_xsxth64) { + flag = 0xFFFFFFFFFFFF8000; /* specify the flag with forty-nine 1s at top in this case */ + } else if (thisMop == MOP_xsxtw64) { + flag = 0xFFFFFFFF80000000; /* specify the flag with thirty-three 1s at top in this case */ + } + if (!(static_cast(value) & flag) && immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + auto *aarch64CGFunc = static_cast(&cgFunc); + RegOperand &dstOpnd = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand( + static_cast(dstMovOpnd.GetRegisterNumber()), k64BitSize, dstMovOpnd.GetRegisterType()); + prevInsn->SetOperand(kInsnFirstOpnd, dstOpnd); + prevInsn->SetMOperator(MOP_xmovri64); + bb.RemoveInsn(insn); + } + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrsb) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + if (thisMop == MOP_xsxtb32) { + bb.RemoveInsn(insn); + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrsh) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + if (thisMop == MOP_xsxth32) { + bb.RemoveInsn(insn); + } + } + } +} + +void EliminateSpecifcUXTAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®Opnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (prevInsn->IsCall() && + regOpnd0.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + (regOpnd1.GetRegisterNumber() == R0 || regOpnd1.GetRegisterNumber() == V0)) { + uint32 retSize = prevInsn->GetRetSize(); + if (retSize > 0 && + ((thisMop == MOP_xuxtb32 && retSize <= k1ByteSize) || + (thisMop == MOP_xuxth32 && retSize <= k2ByteSize) || + (thisMop == MOP_xuxtw64 && retSize <= k4ByteSize))) { + bb.RemoveInsn(insn); + } + return; + } + if (&insn == bb.GetFirstInsn() || regOpnd0.GetRegisterNumber() != regOpnd1.GetRegisterNumber() || + !prevInsn->IsMachineInstruction()) { + return; + } + if (cgFunc.GetMirModule().GetSrcLang() == kSrcLangC && prevInsn->IsCall() && prevInsn->GetIsCallReturnSigned()) { + return; + } + if (thisMop == MOP_xuxtb32) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + /* check the top 56 bits of value */ + if (!(static_cast(value) & 0xFFFFFFFFFFFFFF00)) { + bb.RemoveInsn(insn); + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrb) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + bb.RemoveInsn(insn); + } + } else if (thisMop == MOP_xuxth32) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + if (!(static_cast(value) & 0xFFFFFFFFFFFF0000)) { + bb.RemoveInsn(insn); + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrh) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + bb.RemoveInsn(insn); + } + } else { + /* this_mop == MOP_xuxtw64 */ + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_wldrsb || + prevInsn->GetMachineOpcode() == MOP_wldrb || prevInsn->GetMachineOpcode() == MOP_wldrsh || + prevInsn->GetMachineOpcode() == MOP_wldrh || prevInsn->GetMachineOpcode() == MOP_wldr) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + /* 32-bit ldr does zero-extension by default, so this conversion can be skipped */ + bb.RemoveInsn(insn); + } + } +} + +void FmovRegAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *nextInsn = insn.GetNext(); + if (&insn == bb.GetFirstInsn()) { + return; + } + Insn *prevInsn = insn.GetPrev(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + MOperator newMop; + uint32 doOpt = 0; + if (prevMop == MOP_xvmovrv && thisMop == MOP_xvmovrv) { + doOpt = k32BitSize; + newMop = MOP_wmovrr; + } else if (prevMop == MOP_xvmovrd && thisMop == MOP_xvmovrd) { + doOpt = k64BitSize; + newMop = MOP_xmovrr; + } + if (doOpt == 0) { + return; + } + auto &curSrcRegOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &prevSrcRegOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + /* same src freg */ + if (curSrcRegOpnd.GetRegisterNumber() != prevSrcRegOpnd.GetRegisterNumber()) { + return; + } + auto &curDstRegOpnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t curDstReg = curDstRegOpnd.GetRegisterNumber(); + CG *cg = cgFunc.GetCG(); + /* optimize case 1 */ + auto &prevDstRegOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + regno_t prevDstReg = prevDstRegOpnd.GetRegisterNumber(); + auto *aarch64CGFunc = static_cast(&cgFunc); + RegOperand &dst = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(curDstReg), doOpt, kRegTyInt); + RegOperand &src = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(prevDstReg), doOpt, kRegTyInt); + Insn &newInsn = cg->BuildInstruction(newMop, dst, src); + bb.InsertInsnBefore(insn, newInsn); + bb.RemoveInsn(insn); + if (nextInsn == nullptr) { + return; + } + RegOperand &newOpnd = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(prevDstReg), doOpt, kRegTyInt); + uint32 opndNum = nextInsn->GetOperandSize(); + for (uint32 opndIdx = 0; opndIdx < opndNum; ++opndIdx) { + Operand &opnd = nextInsn->GetOperand(opndIdx); + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + if (base != nullptr) { + if (base->IsRegister()) { + auto *reg = static_cast(base); + if (reg->GetRegisterNumber() == curDstReg) { + memOpnd.SetBaseRegister(newOpnd); + } + } + } + Operand *offset = memOpnd.GetIndexRegister(); + if (offset != nullptr) { + if (offset->IsRegister()) { + auto *reg = static_cast(offset); + if (reg->GetRegisterNumber() == curDstReg) { + memOpnd.SetIndexRegister(newOpnd); + } + } + } + } else if (opnd.IsRegister()) { + /* Check if it is a source operand. */ + const AArch64MD *md = &AArch64CG::kMd[static_cast(nextInsn)->GetMachineOpcode()]; + auto *regProp = static_cast(md->operand[opndIdx]); + if (regProp->IsUse()) { + auto ® = static_cast(opnd); + if (reg.GetRegisterNumber() == curDstReg) { + nextInsn->SetOperand(opndIdx, newOpnd); + } + } + } + } +} + +void CbnzToCbzAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + /* reg has to be R0, since return value is in R0 */ + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (regOpnd0.GetRegisterNumber() != R0) { + return; + } + BB *nextBB = bb.GetNext(); + /* Make sure nextBB can only be reached by bb */ + if (nextBB->GetPreds().size() > 1 || nextBB->GetEhPreds().empty()) { + return; + } + BB *targetBB = nullptr; + auto it = bb.GetSuccsBegin(); + if (*it == nextBB) { + ++it; + } + targetBB = *it; + /* Make sure when nextBB is empty, targetBB is fallthru of bb. */ + if (targetBB != nextBB->GetNext()) { + return; + } + /* Is nextBB branch to the return-bb? */ + if (nextBB->GetSuccs().size() != 1) { + return; + } + BB *nextBBTarget = *(nextBB->GetSuccsBegin()); + if (nextBBTarget->GetKind() != BB::kBBReturn) { + return; + } + /* Next insn should be a mov R0 = 0 */ + Insn *movInsn = nextBB->GetFirstMachineInsn(); + if (movInsn == nullptr) { + return; + } + MOperator movInsnMop = movInsn->GetMachineOpcode(); + if (movInsnMop != MOP_xmovri32 && movInsnMop != MOP_xmovri64) { + return; + } + auto &movDest = static_cast(movInsn->GetOperand(kInsnFirstOpnd)); + if (movDest.GetRegisterNumber() != R0) { + return; + } + auto &movImm = static_cast(movInsn->GetOperand(kInsnSecondOpnd)); + if (movImm.GetValue() != 0) { + return; + } + Insn *brInsn = movInsn->GetNextMachineInsn(); + if (brInsn == nullptr) { + return; + } + if (brInsn->GetMachineOpcode() != MOP_xuncond) { + return; + } + /* Control flow looks nice, instruction looks nice */ + Operand &brTarget = brInsn->GetOperand(kInsnFirstOpnd); + insn.SetOperand(kInsnSecondOpnd, brTarget); + if (thisMop == MOP_wcbnz) { + insn.SetMOP(MOP_wcbz); + } else { + insn.SetMOP(MOP_xcbz); + } + nextBB->RemoveInsn(*movInsn); + nextBB->RemoveInsn(*brInsn); + /* nextBB is now a fallthru bb, not a goto bb */ + nextBB->SetKind(BB::kBBFallthru); + /* + * fix control flow, we have bb, nextBB, targetBB, nextBB_target + * connect bb -> nextBB_target erase targetBB + */ + it = bb.GetSuccsBegin(); + CHECK_FATAL(it != bb.GetSuccsEnd(), "succs is empty."); + if (*it == targetBB) { + bb.EraseSuccs(it); + bb.PushFrontSuccs(*nextBBTarget); + } else { + ++it; + bb.EraseSuccs(it); + bb.PushBackSuccs(*nextBBTarget); + } + for (auto targetBBIt = targetBB->GetPredsBegin(); targetBBIt != targetBB->GetPredsEnd(); ++targetBBIt) { + if (*targetBBIt == &bb) { + targetBB->ErasePreds(targetBBIt); + break; + } + } + for (auto nextIt = nextBBTarget->GetPredsBegin(); nextIt != nextBBTarget->GetPredsEnd(); ++nextIt) { + if (*nextIt == nextBB) { + nextBBTarget->ErasePreds(nextIt); + break; + } + } + nextBBTarget->PushBackPreds(bb); + + /* nextBB has no target, originally just branch target */ + nextBB->EraseSuccs(nextBB->GetSuccsBegin()); + ASSERT(nextBB->GetSuccs().empty(), "peep: branch target incorrect"); + /* Now make nextBB fallthru to targetBB */ + nextBB->PushFrontSuccs(*targetBB); + targetBB->PushBackPreds(*nextBB); +} + +void ContiLDRorSTRToSameMEMAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPrev(); + while (prevInsn != nullptr && !prevInsn->GetMachineOpcode() && prevInsn != bb.GetFirstInsn()) { + prevInsn = prevInsn->GetPrev(); + } + if (!insn.IsMachineInstruction() || prevInsn == nullptr) { + return; + } + bool loadAfterStore = false; + bool loadAfterLoad = false; + MOperator thisMop = insn.GetMachineOpcode(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + /* + * store regB, RegC, offset + * load regA, RegC, offset + */ + if ((thisMop == MOP_xldr && prevMop == MOP_xstr) || (thisMop == MOP_wldr && prevMop == MOP_wstr) || + (thisMop == MOP_dldr && prevMop == MOP_dstr) || (thisMop == MOP_sldr && prevMop == MOP_sstr)) { + loadAfterStore = true; + } + /* + * load regA, RegC, offset + * load regB, RegC, offset + */ + if ((thisMop == MOP_xldr || thisMop == MOP_wldr || thisMop == MOP_dldr || thisMop == MOP_sldr) && + prevMop == thisMop) { + loadAfterLoad = true; + } + if (!loadAfterStore && !loadAfterLoad) { + return; + } + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + ASSERT(prevInsn->GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + + auto &memOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + AArch64MemOperand::AArch64AddressingMode addrMode1 = memOpnd1.GetAddrMode(); + if (addrMode1 != AArch64MemOperand::kAddrModeBOi || (!memOpnd1.IsIntactIndexed())) { + return; + } + + auto *base1 = static_cast(memOpnd1.GetBaseRegister()); + ASSERT(base1 == nullptr || !base1->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset1 = memOpnd1.GetOffsetImmediate(); + + auto &memOpnd2 = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + AArch64MemOperand::AArch64AddressingMode addrMode2 = memOpnd2.GetAddrMode(); + if (addrMode2 != AArch64MemOperand::kAddrModeBOi || (!memOpnd2.IsIntactIndexed())) { + return; + } + + auto *base2 = static_cast(memOpnd2.GetBaseRegister()); + ASSERT(base2 == nullptr || !base2->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset2 = memOpnd2.GetOffsetImmediate(); + + if (base1 == nullptr || base2 == nullptr || offset1 == nullptr || offset2 == nullptr) { + return; + } + + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + int offsetVal1 = offset1->GetOffsetValue(); + int offsetVal2 = offset2->GetOffsetValue(); + if (base1->GetRegisterNumber() != base2->GetRegisterNumber() || + reg1.GetRegisterType() != reg2.GetRegisterType() || reg1.GetSize() != reg2.GetSize() || + offsetVal1 != offsetVal2) { + return; + } + if (loadAfterStore && reg1.GetRegisterNumber() != reg2.GetRegisterNumber()) { + /* replace it with mov */ + MOperator newOp = MOP_wmovrr; + if (reg1.GetRegisterType() == kRegTyInt) { + newOp = (reg1.GetSize() <= k32BitSize) ? MOP_wmovrr : MOP_xmovrr; + } else if (reg1.GetRegisterType() == kRegTyFloat) { + newOp = (reg1.GetSize() <= k32BitSize) ? MOP_xvmovs : MOP_xvmovd; + } + CG *cg = cgFunc.GetCG(); + bb.InsertInsnAfter(*prevInsn, cg->BuildInstruction(newOp, reg1, reg2)); + bb.RemoveInsn(insn); + insn = *(prevInsn->GetNext()); + } else if (reg1.GetRegisterNumber() == reg2.GetRegisterNumber() && + base1->GetRegisterNumber() != reg2.GetRegisterNumber()) { + bb.RemoveInsn(insn); + insn = *prevInsn; + } +} + +void RemoveIncDecRefAArch64::Run(BB &bb, Insn &insn) { + ASSERT(insn.GetMachineOpcode() == MOP_xbl, "expect a xbl MOP at RemoveIncDecRef optimization"); + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if (target.GetName() == "MCC_IncDecRef_NaiveRCFast" && mopMov == MOP_xmovrr && + static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() == R1 && + static_cast(insnMov->GetOperand(kInsnSecondOpnd)).GetRegisterNumber() == R0) { + bb.RemoveInsn(*insnMov); + bb.RemoveInsn(insn); + bb.SetKind(BB::kBBFallthru); + } +} + +#ifdef USE_32BIT_REF +constexpr uint32 kRefSize = 32; +#else +constexpr uint32 kRefSize = 64; +#endif + +void InlineReadBarriersAArch64::Run(BB &bb, Insn &insn) { + if (!CGOptions::IsGCOnly()) { /* Inline read barriers only enabled for GCONLY. */ + return; + } + const std::string &barrierName = GetReadBarrierName(insn); + CG *cg = cgFunc.GetCG(); + if (barrierName == kMccDummy) { + /* remove dummy call. */ + bb.RemoveInsn(insn); + } else { + /* replace barrier function call with load instruction. */ + bool isVolatile = (barrierName == kMccLoadRefV || barrierName == kMccLoadRefVS); + bool isStatic = (barrierName == kMccLoadRefS || barrierName == kMccLoadRefVS); + /* refSize is 32 if USE_32BIT_REF defined, otherwise 64. */ + const uint32 refSize = kRefSize; + auto *aarch64CGFunc = static_cast(&cgFunc); + MOperator loadOp = GetLoadOperator(refSize, isVolatile); + RegOperand ®Op = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(R0, refSize, kRegTyInt); + AArch64reg addrReg = isStatic ? R0 : R1; + MemOperand &addr = aarch64CGFunc->CreateMemOpnd(addrReg, 0, refSize); + Insn &loadInsn = cg->BuildInstruction(loadOp, regOp, addr); + bb.ReplaceInsn(insn, loadInsn); + } + bb.SetKind(BB::kBBFallthru); + bool isTailCall = (insn.GetMachineOpcode() == MOP_tail_call_opt_xbl); + if (isTailCall) { + /* add 'ret' instruction for tail call optimized load barrier. */ + Insn &retInsn = cg->BuildInstruction(MOP_xret); + bb.AppendInsn(retInsn); + bb.SetKind(BB::kBBReturn); + } +} + +void ReplaceDivToMultiAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + Insn *prePrevInsn = prevInsn->GetPreviousMachineInsn(); + auto &sdivOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &sdivOpnd2 = static_cast(insn.GetOperand(kInsnThirdOpnd)); + if (sdivOpnd1.GetRegisterNumber() == sdivOpnd2.GetRegisterNumber() || sdivOpnd1.GetRegisterNumber() == R16 || + sdivOpnd2.GetRegisterNumber() == R16 || prePrevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + MOperator prePrevMop = prePrevInsn->GetMachineOpcode(); + if (prevMop && (prevMop == MOP_wmovkri16) && prePrevMop && (prePrevMop == MOP_xmovri32)) { + /* Check if dest operand of insn is idential with register of prevInsn and prePrevInsn. */ + if ((&(prevInsn->GetOperand(kInsnFirstOpnd)) != &sdivOpnd2) || + (&(prePrevInsn->GetOperand(kInsnFirstOpnd)) != &sdivOpnd2)) { + return; + } + auto &prevLsl = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (prevLsl.GetShiftAmount() != k16BitSize) { + return; + } + auto &prevImmOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + auto &prePrevImmOpnd = static_cast(prePrevInsn->GetOperand(kInsnSecondOpnd)); + /* + * expect the immediate value of first mov is 0x086A0 which matches 0x186A0 + * because 0x10000 is ignored in 32 bits register + */ + if ((prevImmOpnd.GetValue() != 1) || (prePrevImmOpnd.GetValue() != 34464)) { + return; + } + auto *aarch64CGFunc = static_cast(&cgFunc); + CG *cg = cgFunc.GetCG(); + /* mov w16, #0x588f */ + RegOperand &tempOpnd = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R16), + k64BitSize, kRegTyInt); + /* create a immedate operand with this specific value */ + ImmOperand &multiplierLow = aarch64CGFunc->CreateImmOperand(0x588f, k32BitSize, false); + Insn &multiplierLowInsn = cg->BuildInstruction(MOP_xmovri32, tempOpnd, multiplierLow); + bb.InsertInsnBefore(*prePrevInsn, multiplierLowInsn); + + /* + * movk w16, #0x4f8b, LSL #16 + * create a immedate operand with this specific value + */ + ImmOperand &multiplierHigh = aarch64CGFunc->CreateImmOperand(0x4f8b, k32BitSize, false); + LogicalShiftLeftOperand *multiplierHighLsl = aarch64CGFunc->GetLogicalShiftLeftOperand(k16BitSize, true); + Insn &multiplierHighInsn = + cg->BuildInstruction(MOP_wmovkri16, tempOpnd, multiplierHigh, *multiplierHighLsl); + bb.InsertInsnBefore(*prePrevInsn, multiplierHighInsn); + + /* smull x16, w0, w16 */ + Insn &newSmullInsn = + cg->BuildInstruction(MOP_xsmullrrr, tempOpnd, sdivOpnd1, tempOpnd); + bb.InsertInsnBefore(*prePrevInsn, newSmullInsn); + + /* asr x16, x16, #32 */ + ImmOperand &dstLsrImmHigh = aarch64CGFunc->CreateImmOperand(k32BitSize, k32BitSize, false); + Insn &dstLsrInsnHigh = + cg->BuildInstruction(MOP_xasrrri6, tempOpnd, tempOpnd, dstLsrImmHigh); + bb.InsertInsnBefore(*prePrevInsn, dstLsrInsnHigh); + + /* add x16, x16, w0, SXTW */ + Operand &sxtw = aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, 0, 3); + Insn &addInsn = + cg->BuildInstruction(MOP_xxwaddrrre, tempOpnd, tempOpnd, sdivOpnd1, sxtw); + bb.InsertInsnBefore(*prePrevInsn, addInsn); + + /* asr x16, x16, #17 */ + ImmOperand &dstLsrImmChange = aarch64CGFunc->CreateImmOperand(17, k32BitSize, false); + Insn &dstLsrInsnChange = + cg->BuildInstruction(MOP_xasrrri6, tempOpnd, tempOpnd, dstLsrImmChange); + bb.InsertInsnBefore(*prePrevInsn, dstLsrInsnChange); + + /* add x2, x16, x0, LSR #31 */ + auto &sdivOpnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t sdivOpnd0RegNO = sdivOpnd0.GetRegisterNumber(); + RegOperand &extendSdivOpnd0 = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(sdivOpnd0RegNO), + k64BitSize, kRegTyInt); + + regno_t sdivOpnd1RegNum = sdivOpnd1.GetRegisterNumber(); + RegOperand &extendSdivOpnd1 = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(sdivOpnd1RegNum), + k64BitSize, kRegTyInt); + /* shift bit amount is thirty-one at this insn */ + BitShiftOperand &addLsrOpnd = aarch64CGFunc->CreateBitShiftOperand(BitShiftOperand::kLSR, 31, 6); + Insn &addLsrInsn = cg->BuildInstruction(MOP_xaddrrrs, extendSdivOpnd0, tempOpnd, + extendSdivOpnd1, addLsrOpnd); + bb.InsertInsnBefore(*prePrevInsn, addLsrInsn); + + /* + * remove insns + * Check if x1 is used after sdiv insn, and if it is in live-out. + */ + if (sdivOpnd2.GetRegisterNumber() != sdivOpnd0.GetRegisterNumber()) { + if (IfOperandIsLiveAfterInsn(sdivOpnd2, insn)) { + /* Only remove div instruction. */ + bb.RemoveInsn(insn); + return; + } + } + + bb.RemoveInsn(*prePrevInsn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(insn); + } +} + +void AndCmpBranchesToCsetAArch64::Run(BB &bb, Insn &insn) { + /* prevInsn must be "cmp" insn */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr || + (prevInsn->GetMachineOpcode() != MOP_wcmpri && prevInsn->GetMachineOpcode() != MOP_xcmpri)) { + return; + } + /* prevPrevInsn must be "and" insn */ + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr || + (prevPrevInsn->GetMachineOpcode() != MOP_wandrri12 && prevPrevInsn->GetMachineOpcode() != MOP_xandrri13)) { + return; + } + + auto &csetCond = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &cmpImm = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + int64 cmpImmVal = cmpImm.GetValue(); + auto &andImm = static_cast(prevPrevInsn->GetOperand(kInsnThirdOpnd)); + int64 andImmVal = andImm.GetValue(); + if ((csetCond.GetCode() == CC_EQ && cmpImmVal == andImmVal) || + (csetCond.GetCode() == CC_NE && cmpImmVal == 0)) { + /* if flag_reg of "cmp" is live later, we can't remove cmp insn. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB())) { + return; + } + + auto &csetReg = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (andImmVal == 1) { + if (!RegOperand::IsSameRegNO(csetReg, prevInsn->GetOperand(kInsnSecondOpnd)) || + !RegOperand::IsSameRegNO(csetReg, prevPrevInsn->GetOperand(kInsnFirstOpnd))) { + return; + } + /* save the "and" insn only. */ + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + } else { + if (!RegOperand::IsSameReg(csetReg, prevInsn->GetOperand(kInsnSecondOpnd)) || + !RegOperand::IsSameReg(csetReg, prevPrevInsn->GetOperand(kInsnFirstOpnd)) || + !RegOperand::IsSameReg(csetReg, prevPrevInsn->GetOperand(kInsnSecondOpnd))) { + return; + } + + /* andImmVal is n power of 2 */ + int n = logValueAtBase2(andImmVal); + if (n < 0) { + return; + } + + /* create ubfx insn */ + MOperator ubfxOp = (csetReg.GetSize() <= k32BitSize) ? MOP_wubfxrri5i5 : MOP_xubfxrri6i6; + auto ® = static_cast(csetReg); + CG *cg = cgFunc.GetCG(); + auto *aarch64CGFunc = static_cast(&cgFunc); + ImmOperand &bitPos = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + ImmOperand &bitSize = aarch64CGFunc->CreateImmOperand(1, k8BitSize, false); + Insn &ubfxInsn = cg->BuildInstruction(ubfxOp, reg, reg, bitPos, bitSize); + bb.InsertInsnBefore(*prevPrevInsn, ubfxInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } + } +} + +void ZeroCmpBranchesAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (!insn.IsBranch() || insn.GetOperandSize() <= kInsnSecondOpnd || prevInsn == nullptr) { + return; + } + if (!insn.GetOperand(kInsnSecondOpnd).IsLabel()) { + return; + } + LabelOperand *label = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + RegOperand *regOpnd = nullptr; + RegOperand *reg0 = nullptr; + RegOperand *reg1 = nullptr; + MOperator newOp = MOP_undef; + ImmOperand *imm = nullptr; + switch (prevInsn->GetMachineOpcode()) { + case MOP_wcmpri: + case MOP_xcmpri: { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + imm = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (imm->GetValue() != 0) { + return; + } + if (insn.GetMachineOpcode() == MOP_bge) { + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else if (insn.GetMachineOpcode() == MOP_blt) { + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + } + case MOP_wcmprr: + case MOP_xcmprr: { + reg0 = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + reg1 = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (!reg0->IsZeroRegister() && !reg1->IsZeroRegister()) { + return; + } + switch (insn.GetMachineOpcode()) { + case MOP_bge: + if (reg1->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else { + return; + } + break; + case MOP_ble: + if (reg0->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else { + return; + } + break; + case MOP_blt: + if (reg1->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + case MOP_bgt: + if (reg0->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + default: + return; + } + break; + } + default: + return; + } + CG *cg = cgFunc.GetCG(); + auto aarch64CGFunc = static_cast(&cgFunc); + ImmOperand &bitp = aarch64CGFunc->CreateImmOperand( + (regOpnd->GetSize() <= k32BitSize) ? (k32BitSize - 1) : (k64BitSize - 1), k8BitSize, false); + bb.InsertInsnAfter( + insn, cg->BuildInstruction(newOp, *static_cast(regOpnd), bitp, *label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); +} + +/* + * if there is define point of checkInsn->GetOperand(opndIdx) between startInsn and firstInsn + * return define insn. else return nullptr + */ +const Insn *CmpCsetAArch64::DefInsnOfOperandInBB(const Insn &startInsn, const Insn &checkInsn, int opndIdx) { + Insn *prevInsn = nullptr; + for (const Insn *insn = &startInsn; insn != nullptr; insn = prevInsn) { + prevInsn = insn->GetPreviousMachineInsn(); + if (!insn->IsMachineInstruction()) { + continue; + } + /* checkInsn.GetOperand(opndIdx) is thought modified conservatively */ + if (insn->IsCall()) { + return insn; + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsDef()) { + continue; + } + /* Operand is base reg of Memory, defined by str */ + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + ASSERT(base->IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(*base, checkInsn.GetOperand(opndIdx)) && + memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + return insn; + } + } else { + ASSERT(opnd.IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(checkInsn.GetOperand(opndIdx), opnd)) { + return insn; + } + } + } + } + return nullptr; +} + +bool CmpCsetAArch64::OpndDefByOneValidBit(const Insn &defInsn) { + MOperator defMop = defInsn.GetMachineOpcode(); + switch (defMop) { + case MOP_wcsetrc: + case MOP_xcsetrc: + return true; + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = defInsn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + return (defConstValue == 0 || defConstValue == 1); + } + case MOP_xmovrr: + case MOP_wmovrr: + return defInsn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + case MOP_wlsrrri5: + case MOP_xlsrrri6: { + Operand &opnd2 = defInsn.GetOperand(kInsnThirdOpnd); + ASSERT(opnd2.IsIntImmediate(), "expects ImmOperand"); + auto &opndImm = static_cast(opnd2); + int64 shiftBits = opndImm.GetValue(); + return ((defMop == MOP_wlsrrri5 && shiftBits == (k32BitSize - 1)) || + (defMop == MOP_xlsrrri6 && shiftBits == (k64BitSize - 1))); + } + default: + return false; + } +} + +/* + * help function for cmpcset optimize + * if all define points of used opnd in insn has only one valid bit,return true. + * for cmp reg,#0(#1), that is checking for reg + */ +bool CmpCsetAArch64::CheckOpndDefPoints(Insn &checkInsn, int opndIdx) { + /* check current BB */ + const Insn *defInsn = DefInsnOfOperandInBB(checkInsn, checkInsn, opndIdx); + if (defInsn != nullptr) { + return OpndDefByOneValidBit(*defInsn); + } + /* check pred */ + for (auto predBB : checkInsn.GetBB()->GetPreds()) { + const Insn *tempInsn = nullptr; + if (predBB->GetLastInsn() != nullptr) { + tempInsn = DefInsnOfOperandInBB(*predBB->GetLastInsn(), checkInsn, opndIdx); + } + if (tempInsn == nullptr || !OpndDefByOneValidBit(*tempInsn)) { + return false; + } + } + return true; +} + +/* Check there is use point of rflag start from startInsn to current bb bottom */ +bool CmpCsetAArch64::FlagUsedLaterInCurBB(const BB &bb, Insn &startInsn) const { + if (&bb != startInsn.GetBB()) { + return false; + } + Insn *nextInsn = nullptr; + for (Insn *insn = &startInsn; insn != nullptr; insn = nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + /* + * For condition operand, such as NE, EQ and so on, the register number should be + * same with RFLAG, we only need check the property of use/def. + */ + if (!opnd.IsConditionCode()) { + continue; + } + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isUse = regProp->IsUse(); + if (isUse) { + return true; + } else { + ASSERT(regProp->IsDef(), "register should be redefined."); + return false; + } + } + } + return false; +} + +void CmpCsetAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator firstMop = insn.GetMachineOpcode(); + MOperator secondMop = nextInsn->GetMachineOpcode(); + if ((firstMop == MOP_wcmpri || firstMop == MOP_xcmpri) && + (secondMop == MOP_wcsetrc || secondMop == MOP_xcsetrc)) { + Operand &cmpFirstOpnd = insn.GetOperand(kInsnSecondOpnd); + /* get ImmOperand, must be 0 or 1 */ + Operand &cmpSecondOpnd = insn.GetOperand(kInsnThirdOpnd); + auto &cmpFlagReg = static_cast(insn.GetOperand(kInsnFirstOpnd)); + ASSERT(cmpSecondOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &cmpConst = static_cast(cmpSecondOpnd); + int64 cmpConstVal = cmpConst.GetValue(); + Operand &csetFirstOpnd = nextInsn->GetOperand(kInsnFirstOpnd); + if ((cmpConstVal != 0 && cmpConstVal != 1) || !CheckOpndDefPoints(insn, 1) || + (nextInsn->GetNextMachineInsn() != nullptr && + FlagUsedLaterInCurBB(bb, *nextInsn->GetNextMachineInsn())) || + FindRegLiveOut(cmpFlagReg, *insn.GetBB())) { + return; + } + + Insn *csetInsn = nextInsn; + nextInsn = nextInsn->GetNextMachineInsn(); + auto &cond = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if ((cmpConstVal == 0 && cond.GetCode() == CC_NE) || (cmpConstVal == 1 && cond.GetCode() == CC_EQ)) { + if (RegOperand::IsSameRegNO(cmpFirstOpnd, csetFirstOpnd)) { + bb.RemoveInsn(insn); + bb.RemoveInsn(*csetInsn); + } else { + if (cmpFirstOpnd.GetSize() != csetFirstOpnd.GetSize()) { + return; + } + MOperator mopCode = (cmpFirstOpnd.GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, csetFirstOpnd, cmpFirstOpnd); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } else if ((cmpConstVal == 1 && cond.GetCode() == CC_NE) || (cmpConstVal == 0 && cond.GetCode() == CC_EQ)) { + MOperator mopCode = (cmpFirstOpnd.GetSize() == k64BitSize) ? MOP_xeorrri13 : MOP_weorrri12; + ImmOperand &one = static_cast(&cgFunc)->CreateImmOperand(1, k8BitSize, false); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, csetFirstOpnd, cmpFirstOpnd, one); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } +} + +/* + * help function for DeleteMovAfterCbzOrCbnz + * input: + * bb: the bb to be checked out + * checkCbz: to check out BB end with cbz or cbnz, if cbz, input true + * opnd: for MOV reg, #0, opnd indicate reg + * return: + * according to cbz, return true if insn is cbz or cbnz and the first operand of cbz(cbnz) is same as input + * operand + */ +bool DeleteMovAfterCbzOrCbnzAArch64::PredBBCheck(BB &bb, bool checkCbz, const Operand &opnd) const { + if (bb.GetKind() != BB::kBBIf) { + return false; + } + + Insn *condBr = cgcfg->FindLastCondBrInsn(bb); + ASSERT(condBr != nullptr, "condBr must be found"); + if (!cgcfg->IsCompareAndBranchInsn(*condBr)) { + return false; + } + MOperator mOp = condBr->GetMachineOpcode(); + if (checkCbz && mOp != MOP_wcbz && mOp != MOP_xcbz) { + return false; + } + if (!checkCbz && mOp != MOP_xcbnz && mOp != MOP_wcbnz) { + return false; + } + return RegOperand::IsSameRegNO(condBr->GetOperand(kInsnFirstOpnd), opnd); +} + +bool DeleteMovAfterCbzOrCbnzAArch64::OpndDefByMovZero(const Insn &insn) const { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + if (defConstValue == 0) { + return true; + } + return false; + } + case MOP_xmovrr: + case MOP_wmovrr: { + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(secondOpnd.IsRegister(), "expects RegOperand here"); + auto ®Opnd = static_cast(secondOpnd); + return regOpnd.IsZeroRegister(); + } + default: + return false; + } +} + +/* check whether predefine insn of first operand of test_insn is exist in current BB */ +bool DeleteMovAfterCbzOrCbnzAArch64::NoPreDefine(Insn &testInsn) const { + Insn *nextInsn = nullptr; + for (Insn *insn = testInsn.GetBB()->GetFirstInsn(); insn != nullptr && insn != &testInsn; insn = nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + if (!insn->IsMachineInstruction()) { + continue; + } + ASSERT(!insn->IsCall(), "CG internal error, call insn should not be at the middle of the BB."); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsDef()) { + continue; + } + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + ASSERT(base->IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(*base, testInsn.GetOperand(kInsnFirstOpnd)) && + memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + return false; + } + } else { + ASSERT(opnd.IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(testInsn.GetOperand(kInsnFirstOpnd), opnd)) { + return false; + } + } + } + } + return true; +} +void DeleteMovAfterCbzOrCbnzAArch64::ProcessBBHandle(BB *processBB, const BB &bb, const Insn &insn) { + FOR_BB_INSNS_SAFE(processInsn, processBB, nextProcessInsn) { + nextProcessInsn = processInsn->GetNextMachineInsn(); + if (!processInsn->IsMachineInstruction()) { + continue; + } + /* register may be a caller save register */ + if (processInsn->IsCall()) { + break; + } + if (!OpndDefByMovZero(*processInsn) || !NoPreDefine(*processInsn) || + !RegOperand::IsSameRegNO(processInsn->GetOperand(kInsnFirstOpnd), insn.GetOperand(kInsnFirstOpnd))) { + continue; + } + bool toDoOpt = true; + MOperator condBrMop = insn.GetMachineOpcode(); + /* process elseBB, other preds must be cbz */ + if (condBrMop == MOP_wcbnz || condBrMop == MOP_xcbnz) { + /* check out all preds of process_bb */ + for (auto *processBBPred : processBB->GetPreds()) { + if (processBBPred == &bb) { + continue; + } + if (!PredBBCheck(*processBBPred, true, processInsn->GetOperand(kInsnFirstOpnd))) { + toDoOpt = false; + break; + } + } + } else { + /* process ifBB, other preds can be cbz or cbnz(one at most) */ + for (auto processBBPred : processBB->GetPreds()) { + if (processBBPred == &bb) { + continue; + } + /* for cbnz pred, there is one at most */ + if (!PredBBCheck(*processBBPred, processBBPred != processBB->GetPrev(), + processInsn->GetOperand(kInsnFirstOpnd))) { + toDoOpt = false; + break; + } + } + } + if (!toDoOpt) { + continue; + } + processBB->RemoveInsn(*processInsn); + } +} + +/* ldr wn, [x1, wn, SXTW] + * add x2, wn, x2 + */ +bool ComplexMemOperandAddAArch64::IsExpandBaseOpnd(const Insn &insn, Insn &prevInsn) { + MOperator prevMop = prevInsn.GetMachineOpcode(); + if (prevMop >= MOP_wldrsb && prevMop <= MOP_xldr && + prevInsn.GetOperand(kInsnFirstOpnd).Equals(insn.GetOperand(kInsnSecondOpnd))) { + return true; + } + return false; +} + +void ComplexMemOperandAddAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + Insn *prevInsn = insn.GetPreviousMachineInsn(); + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrr && thisMop != MOP_waddrrr) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + if (!IsMemOperandOptPattern(insn, *nextInsn)) { + return; + } + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + auto newBaseOpnd = static_cast(&insn.GetOperand(kInsnSecondOpnd)); + auto newIndexOpnd = static_cast(&insn.GetOperand(kInsnThirdOpnd)); + regno_t memBaseOpndRegNO = newBaseOpnd->GetRegisterNumber(); + if (newBaseOpnd->GetSize() <= k32BitSize && prevInsn != nullptr && IsExpandBaseOpnd(insn, *prevInsn)) { + newBaseOpnd = &aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(memBaseOpndRegNO), + k64BitSize, kRegTyInt); + } + if (newBaseOpnd->GetSize() != k64BitSize) { + return; + } + if (newIndexOpnd->GetSize() <= k32BitSize) { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), newBaseOpnd, + newIndexOpnd, 0, false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } else { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), newBaseOpnd, + newIndexOpnd, nullptr, nullptr); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } + bb.RemoveInsn(insn); + } +} + +void DeleteMovAfterCbzOrCbnzAArch64::Run(BB &bb, Insn &insn) { + if (bb.GetKind() != BB::kBBIf) { + return; + } + if (&insn != cgcfg->FindLastCondBrInsn(bb)) { + return; + } + if (!cgcfg->IsCompareAndBranchInsn(insn)) { + return; + } + BB *processBB = nullptr; + if (bb.GetNext() == maplebe::CGCFG::GetTargetSuc(bb)) { + return; + } + + MOperator condBrMop = insn.GetMachineOpcode(); + if (condBrMop == MOP_wcbnz || condBrMop == MOP_xcbnz) { + processBB = bb.GetNext(); + } else { + processBB = maplebe::CGCFG::GetTargetSuc(bb); + } + + ASSERT(processBB != nullptr, "process_bb is null in DeleteMovAfterCbzOrCbnzAArch64::Run"); + ProcessBBHandle(processBB, bb, insn); +} + +MOperator OneHoleBranchesPreAArch64::FindNewMop(const BB &bb, const Insn &insn) const { + MOperator newOp = MOP_undef; + if (&insn != bb.GetLastInsn()) { + return newOp; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_wcbz && thisMop != MOP_wcbnz && thisMop != MOP_xcbz && thisMop != MOP_xcbnz) { + return newOp; + } + switch (thisMop) { + case MOP_wcbz: + newOp = MOP_wtbnz; + break; + case MOP_wcbnz: + newOp = MOP_wtbz; + break; + case MOP_xcbz: + newOp = MOP_xtbnz; + break; + case MOP_xcbnz: + newOp = MOP_xtbz; + break; + default: + CHECK_FATAL(false, "can not touch here"); + break; + } + return newOp; +} + +void OneHoleBranchesPreAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + MOperator newOp = FindNewMop(bb, insn); + if (newOp == MOP_undef) { + return; + } + Insn *prevInsn = insn.GetPreviousMachineInsn(); + LabelOperand &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (prevInsn != nullptr && prevInsn->GetMachineOpcode() == MOP_xuxtb32 && + (static_cast(prevInsn->GetOperand(kInsnSecondOpnd)).GetValidBitsNum() <= k8BitSize || + static_cast(prevInsn->GetOperand(kInsnFirstOpnd)).GetValidBitsNum() <= k8BitSize)) { + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + insn.SetOperand(kInsnFirstOpnd, prevInsn->GetOperand(kInsnSecondOpnd)); + bb.RemoveInsn(*prevInsn); + } + if (prevInsn != nullptr && + (prevInsn->GetMachineOpcode() == MOP_xeorrri13 || prevInsn->GetMachineOpcode() == MOP_weorrri12) && + static_cast(prevInsn->GetOperand(kInsnThirdOpnd)).GetValue() == 1) { + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr) { + return; + } + if (prevPrevInsn->GetMachineOpcode() != MOP_xuxtb32 || + static_cast(prevPrevInsn->GetOperand(kInsnSecondOpnd)).GetValidBitsNum() != 1) { + return; + } + if (&(prevPrevInsn->GetOperand(kInsnFirstOpnd)) != &(prevInsn->GetOperand(kInsnSecondOpnd))) { + return; + } + ImmOperand &oneHoleOpnd = aarch64CGFunc->CreateImmOperand(0, k8BitSize, false); + auto ®Operand = static_cast(prevPrevInsn->GetOperand(kInsnSecondOpnd)); + bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(newOp, regOperand, oneHoleOpnd, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } +} + +bool LoadFloatPointAArch64::FindLoadFloatPoint(std::vector &optInsn, Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + optInsn.clear(); + if (mOp != MOP_xmovzri16) { + return false; + } + optInsn.emplace_back(&insn); + + Insn *insnMov2 = insn.GetNextMachineInsn(); + if (insnMov2 == nullptr) { + return false; + } + if (insnMov2->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.emplace_back(insnMov2); + + Insn *insnMov3 = insnMov2->GetNextMachineInsn(); + if (insnMov3 == nullptr) { + return false; + } + if (insnMov3->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.emplace_back(insnMov3); + + Insn *insnMov4 = insnMov3->GetNextMachineInsn(); + if (insnMov4 == nullptr) { + return false; + } + if (insnMov4->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.emplace_back(insnMov4); + return true; +} + +bool LoadFloatPointAArch64::IsPatternMatch(const std::vector &optInsn) { + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + if ((static_cast(insn1->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn2->GetOperand(kInsnFirstOpnd)).GetRegisterNumber()) || + (static_cast(insn2->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn3->GetOperand(kInsnFirstOpnd)).GetRegisterNumber()) || + (static_cast(insn3->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn4->GetOperand(kInsnFirstOpnd)).GetRegisterNumber())) { + return false; + } + if ((static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != 0) || + (static_cast(insn2->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + k16BitSize) || + (static_cast(insn3->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + k32BitSize) || + (static_cast(insn4->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + (k16BitSize + k32BitSize))) { + return false; + } + return true; +} + +void LoadFloatPointAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + /* logical shift left values in three optimized pattern */ + std::vector optInsn; + if (FindLoadFloatPoint(optInsn, insn) && IsPatternMatch(optInsn)) { + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + auto &movConst1 = static_cast(insn1->GetOperand(kInsnSecondOpnd)); + auto &movConst2 = static_cast(insn2->GetOperand(kInsnSecondOpnd)); + auto &movConst3 = static_cast(insn3->GetOperand(kInsnSecondOpnd)); + auto &movConst4 = static_cast(insn4->GetOperand(kInsnSecondOpnd)); + /* movk/movz's immOpnd is 16-bit unsigned immediate */ + uint64 value = static_cast(movConst1.GetValue()) + + (static_cast(movConst2.GetValue()) << k16BitSize) + + (static_cast(movConst3.GetValue()) << k32BitSize) + + (static_cast(movConst4.GetValue()) << (k16BitSize + k32BitSize)); + + LabelIdx lableIdx = cgFunc.CreateLabel(); + LabelOperand &target = aarch64CGFunc->GetOrCreateLabelOperand(lableIdx); + cgFunc.InsertLabelMap(lableIdx, value); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_xldli, insn4->GetOperand(kInsnFirstOpnd), + target); + bb.InsertInsnAfter(*insn4, newInsn); + bb.RemoveInsn(*insn1); + bb.RemoveInsn(*insn2); + bb.RemoveInsn(*insn3); + bb.RemoveInsn(*insn4); + } +} + +void ReplaceOrrToMovAArch64::Run(BB &bb, Insn &insn){ + Operand *opndOfOrr = nullptr; + ImmOperand *immOpnd = nullptr; + AArch64RegOperand *reg1 = nullptr; + AArch64RegOperand *reg2 = nullptr; + MOperator thisMop = insn.GetMachineOpcode(); + MOperator newMop = MOP_undef; + switch (thisMop) { + case MOP_wiorri12r: { /* opnd1 is Reg32 and opnd2 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnSecondOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnThirdOpnd)); + newMop = MOP_wmovrr; + break; + } + case MOP_wiorrri12: { /* opnd1 is reg32 and opnd3 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnThirdOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + newMop = MOP_wmovrr; + break; + } + case MOP_xiorri13r: { /* opnd1 is Reg64 and opnd2 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnSecondOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnThirdOpnd)); + newMop = MOP_xmovrr; + break; + } + case MOP_xiorrri13: { /* opnd1 is reg64 and opnd3 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnThirdOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + newMop = MOP_xmovrr; + break; + } + default: + break; + } + ASSERT(opndOfOrr->IsIntImmediate(), "expects immediate operand"); + immOpnd = static_cast(opndOfOrr); + if (immOpnd->GetValue() == 0) { + reg1 = &static_cast(insn.GetOperand(kInsnFirstOpnd)); + bb.ReplaceInsn(insn, cgFunc.GetCG()->BuildInstruction(newMop, *reg1, *reg2)); + } +} + +void ReplaceCmpToCmnAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + MOperator thisMop = insn.GetMachineOpcode(); + MOperator nextMop = MOP_undef; + MOperator newMop = MOP_undef; + switch (thisMop) { + case MOP_xmovri32: { + nextMop = MOP_wcmprr; + newMop = MOP_wcmnri; + break; + } + case MOP_xmovri64: { + nextMop = MOP_xcmprr; + newMop = MOP_xcmnri; + break; + } + default: + break; + } + Operand *opnd1OfMov = &(insn.GetOperand(kInsnFirstOpnd)); + Operand *opnd2OfMov = &(insn.GetOperand(kInsnSecondOpnd)); + if (opnd2OfMov->IsIntImmediate()) { + ImmOperand *immOpnd = static_cast(opnd2OfMov); + int64 iVal = immOpnd->GetValue(); + if (kNegativeImmLowerLimit <= iVal && iVal < 0) { + Insn *nextInsn = insn.GetNextMachineInsn(); /* get the next insn to judge if it is a cmp instruction. */ + if (nextInsn != nullptr) { + if (nextInsn->GetMachineOpcode() == nextMop) { + Operand *opndCmp2 = &(nextInsn->GetOperand(kInsnSecondOpnd)); + Operand *opndCmp3 = &(nextInsn->GetOperand(kInsnThirdOpnd)); /* get the third operand of cmp */ + /* if the first operand of mov equals the third operand of cmp, match the pattern. */ + if (opnd1OfMov == opndCmp3) { + ImmOperand &newOpnd = aarch64CGFunc->CreateImmOperand(iVal * (-1), immOpnd->GetSize(), false); + Operand ®Flag = nextInsn->GetOperand(kInsnFirstOpnd); + bb.ReplaceInsn(*nextInsn, cgFunc.GetCG()->BuildInstruction(MOperator(newMop), regFlag, + *opndCmp2, newOpnd)); + } + } + } + } + } +} + +void RemoveIncRefAArch64::Run(BB &bb, Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + if (mOp != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_IncDecRef_NaiveRCFast") { + return; + } + Insn *insnMov2 = insn.GetPreviousMachineInsn(); + if (insnMov2 == nullptr) { + return; + } + MOperator mopMov2 = insnMov2->GetMachineOpcode(); + if (mopMov2 != MOP_xmovrr) { + return; + } + Insn *insnMov1 = insnMov2->GetPreviousMachineInsn(); + if (insnMov1 == nullptr) { + return; + } + MOperator mopMov1 = insnMov1->GetMachineOpcode(); + if (mopMov1 != MOP_xmovrr) { + return; + } + if (static_cast(insnMov1->GetOperand(kInsnSecondOpnd)).GetRegisterNumber() != + static_cast(insnMov2->GetOperand(kInsnSecondOpnd)).GetRegisterNumber()) { + return; + } + auto &mov2Dest = static_cast(insnMov2->GetOperand(kInsnFirstOpnd)); + auto &mov1Dest = static_cast(insnMov1->GetOperand(kInsnFirstOpnd)); + if (mov1Dest.IsVirtualRegister() || mov2Dest.IsVirtualRegister() || mov1Dest.GetRegisterNumber() != R0 || + mov2Dest.GetRegisterNumber() != R1) { + return; + } + bb.RemoveInsn(insn); + bb.RemoveInsn(*insnMov2); + bb.RemoveInsn(*insnMov1); + bb.SetKind(BB::kBBFallthru); +} + +bool LongIntCompareWithZAArch64::FindLondIntCmpWithZ(std::vector &optInsn, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + optInsn.clear(); + /* first */ + if (thisMop != MOP_xcmpri) { + return false; + } + optInsn.emplace_back(&insn); + + /* second */ + Insn *nextInsn1 = insn.GetNextMachineInsn(); + if (nextInsn1 == nullptr) { + return false; + } + MOperator nextMop1 = nextInsn1->GetMachineOpcode(); + if (nextMop1 != MOP_wcsinvrrrc) { + return false; + } + optInsn.emplace_back(nextInsn1); + + /* third */ + Insn *nextInsn2 = nextInsn1->GetNextMachineInsn(); + if (nextInsn2 == nullptr) { + return false; + } + MOperator nextMop2 = nextInsn2->GetMachineOpcode(); + if (nextMop2 != MOP_wcsincrrrc) { + return false; + } + optInsn.emplace_back(nextInsn2); + + /* forth */ + Insn *nextInsn3 = nextInsn2->GetNextMachineInsn(); + if (nextInsn3 == nullptr) { + return false; + } + MOperator nextMop3 = nextInsn3->GetMachineOpcode(); + if (nextMop3 != MOP_wcmpri) { + return false; + } + optInsn.emplace_back(nextInsn3); + return true; +} + +bool LongIntCompareWithZAArch64::IsPatternMatch(const std::vector &optInsn) { + constexpr int insnLen = 4; + if (optInsn.size() != insnLen) { + return false; + } + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + ASSERT(insnNum == 3, " this specific case has three insns"); + if (insn2->GetOperand(kInsnSecondOpnd).IsZeroRegister() && insn2->GetOperand(kInsnThirdOpnd).IsZeroRegister() && + insn3->GetOperand(kInsnThirdOpnd).IsZeroRegister() && + &(insn3->GetOperand(kInsnFirstOpnd)) == &(insn3->GetOperand(kInsnSecondOpnd)) && + static_cast(insn2->GetOperand(kInsnFourthOpnd)).GetCode() == CC_GE && + static_cast(insn3->GetOperand(kInsnFourthOpnd)).GetCode() == CC_LE && + static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetValue() == 0 && + static_cast(insn4->GetOperand(kInsnThirdOpnd)).GetValue() == 0) { + return true; + } + return false; +} + +void LongIntCompareWithZAArch64::Run(BB &bb, Insn &insn) { + std::vector optInsn; + /* found pattern */ + if (FindLondIntCmpWithZ(optInsn, insn) && IsPatternMatch(optInsn)) { + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(optInsn[0]->GetMachineOpcode(), + optInsn[0]->GetOperand(kInsnFirstOpnd), + optInsn[0]->GetOperand(kInsnSecondOpnd), + optInsn[0]->GetOperand(kInsnThirdOpnd)); + /* use newInsn to replace the third optInsn */ + bb.ReplaceInsn(*optInsn[3], newInsn); + optInsn.clear(); + } +} + +void ComplexMemOperandAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xadrpl12) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldp) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstp))) { + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + ASSERT(memOpnd != nullptr, "memOpnd is null in AArch64Peep::ComplexMemOperandAArch64"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + return; + } + + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + if (memOpnd->GetBaseRegister() != ®Opnd) { + return; + } + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + + auto &stImmOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + AArch64OfstOperand &offOpnd = aarch64CGFunc->GetOrCreateOfstOpnd( + stImmOpnd.GetOffset() + memOpnd->GetOffsetImmediate()->GetOffsetValue(), k32BitSize); + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeLo12Li, memOpnd->GetSize(), + &newBaseOpnd, nullptr, &offOpnd, stImmOpnd.GetSymbol()); + + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + bb.RemoveInsn(insn); + CHECK_FATAL(!CGOptions::IsLazyBinding() || cgFunc.GetCG()->IsLibcore(), + "this pattern can't be found in this phase"); + } +} + +void ComplexMemOperandPreAddAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrr && thisMop != MOP_waddrrr) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + if (!IsMemOperandOptPattern(insn, *nextInsn)) { + return; + } + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &newIndexOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + if (newBaseOpnd.GetSize() != k64BitSize) { + return; + } + if (newIndexOpnd.GetSize() <= k32BitSize) { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, 0, false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } else { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, nullptr, nullptr); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } + bb.RemoveInsn(insn); + } +} + +bool ComplexMemOperandLSLAArch64::CheckShiftValid(const AArch64MemOperand &memOpnd, BitShiftOperand &lsl) const { + /* check if shift amount is valid */ + uint32 lslAmount = lsl.GetShiftAmount(); + constexpr uint8 twoShiftBits = 2; + constexpr uint8 threeShiftBits = 3; + if ((memOpnd.GetSize() == k32BitSize && (lsl.GetShiftAmount() != 0 && lslAmount != twoShiftBits)) || + (memOpnd.GetSize() == k64BitSize && (lsl.GetShiftAmount() != 0 && lslAmount != threeShiftBits))) { + return false; + } + if (memOpnd.GetSize() != (k8BitSize << lslAmount)) { + return false; + } + return true; +} + +void ComplexMemOperandLSLAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrrs) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + ASSERT(memOpnd != nullptr, "null ptr check"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + + /* Only for immediate is 0. */ + if (memOpnd->GetOffsetImmediate()->GetOffsetValue() != 0) { + return; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + return; + } + + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + if (memOpnd->GetBaseRegister() != ®Opnd) { + return; + } + +#ifdef USE_32BIT_REF + if (nextInsn->IsAccessRefField() && nextInsn->GetOperand(kInsnFirstOpnd).GetSize() > k32BitSize) { + return; + } +#endif + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + auto &lsl = static_cast(insn.GetOperand(kInsnFourthOpnd)); + if (!CheckShiftValid(*memOpnd, lsl)) { + return; + } + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &newIndexOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, lsl.GetShiftAmount(), false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + bb.RemoveInsn(insn); + } +} + + +void ComplexMemOperandLabelAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xldli) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop != MOP_xvmovdr) { + return; + } + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (regOpnd.GetRegisterNumber() != + static_cast(nextInsn->GetOperand(kInsnSecondOpnd)).GetRegisterNumber()) { + return; + } + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_dldli, nextInsn->GetOperand(kInsnFirstOpnd), + insn.GetOperand(kInsnSecondOpnd)); + bb.InsertInsnAfter(*nextInsn, newInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*nextInsn); +} + +/* + * mov R0, vreg1 / R0 -> objDesignateInsn + * add vreg2, vreg1, #imm -> fieldDesignateInsn + * mov R1, vreg2 -> fieldParamDefInsn + * mov R2, vreg3 -> fieldValueDefInsn + */ +bool WriteFieldCallAArch64::WriteFieldCallOptPatternMatch(const Insn &writeFieldCallInsn, WriteRefFieldParam ¶m, + std::vector ¶mDefInsns) { + Insn *fieldValueDefInsn = writeFieldCallInsn.GetPreviousMachineInsn(); + if (fieldValueDefInsn == nullptr || fieldValueDefInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &fieldValueDefInsnDestOpnd = fieldValueDefInsn->GetOperand(kInsnFirstOpnd); + auto &fieldValueDefInsnDestReg = static_cast(fieldValueDefInsnDestOpnd); + if (fieldValueDefInsnDestReg.GetRegisterNumber() != R2) { + return false; + } + paramDefInsns.emplace_back(fieldValueDefInsn); + param.fieldValue = &(fieldValueDefInsn->GetOperand(kInsnSecondOpnd)); + Insn *fieldParamDefInsn = fieldValueDefInsn->GetPreviousMachineInsn(); + if (fieldParamDefInsn == nullptr || fieldParamDefInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &fieldParamDestOpnd = fieldParamDefInsn->GetOperand(kInsnFirstOpnd); + auto &fieldParamDestReg = static_cast(fieldParamDestOpnd); + if (fieldParamDestReg.GetRegisterNumber() != R1) { + return false; + } + paramDefInsns.emplace_back(fieldParamDefInsn); + Insn *fieldDesignateInsn = fieldParamDefInsn->GetPreviousMachineInsn(); + if (fieldDesignateInsn == nullptr || fieldDesignateInsn->GetMachineOpcode() != MOP_xaddrri12) { + return false; + } + Operand &fieldParamDefSrcOpnd = fieldParamDefInsn->GetOperand(kInsnSecondOpnd); + Operand &fieldDesignateDestOpnd = fieldDesignateInsn->GetOperand(kInsnFirstOpnd); + if (!RegOperand::IsSameReg(fieldParamDefSrcOpnd, fieldDesignateDestOpnd)) { + return false; + } + Operand &fieldDesignateBaseOpnd = fieldDesignateInsn->GetOperand(kInsnSecondOpnd); + param.fieldBaseOpnd = &(static_cast(fieldDesignateBaseOpnd)); + auto &immOpnd = static_cast(fieldDesignateInsn->GetOperand(kInsnThirdOpnd)); + param.fieldOffset = immOpnd.GetValue(); + paramDefInsns.emplace_back(fieldDesignateInsn); + Insn *objDesignateInsn = fieldDesignateInsn->GetPreviousMachineInsn(); + if (objDesignateInsn == nullptr || objDesignateInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &objDesignateDestOpnd = objDesignateInsn->GetOperand(kInsnFirstOpnd); + auto &objDesignateDestReg = static_cast(objDesignateDestOpnd); + if (objDesignateDestReg.GetRegisterNumber() != R0) { + return false; + } + Operand &objDesignateSrcOpnd = objDesignateInsn->GetOperand(kInsnSecondOpnd); + if (RegOperand::IsSameReg(objDesignateDestOpnd, objDesignateSrcOpnd) || + !RegOperand::IsSameReg(objDesignateSrcOpnd, fieldDesignateBaseOpnd)) { + return false; + } + param.objOpnd = &(objDesignateInsn->GetOperand(kInsnSecondOpnd)); + paramDefInsns.emplace_back(objDesignateInsn); + return true; +} + +bool WriteFieldCallAArch64::IsWriteRefFieldCallInsn(const Insn &insn) { + if (!insn.IsCall() || insn.IsIndirectCall()) { + return false; + } + Operand *targetOpnd = insn.GetCallTargetOperand(); + ASSERT(targetOpnd != nullptr, "targetOpnd must not be nullptr"); + if (!targetOpnd->IsFuncNameOpnd()) { + return false; + } + FuncNameOperand *target = static_cast(targetOpnd); + const MIRSymbol *funcSt = target->GetFunctionSymbol(); + ASSERT(funcSt->GetSKind() == kStFunc, "the kind of funcSt is unreasonable"); + const std::string &funcName = funcSt->GetName(); + return funcName == "MCC_WriteRefField" || funcName == "MCC_WriteVolatileField"; +} + +static bool MayThrowBetweenInsn(const Insn &prevCallInsn, const Insn &currCallInsn) { + for (Insn *insn = prevCallInsn.GetNext(); insn != nullptr && insn != &currCallInsn; insn = insn->GetNext()) { + if (insn->MayThrow()) { + return true; + } + } + return false; +} + +void WriteFieldCallAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + std::vector paramDefInsns; + Insn *nextInsn = insn.GetNextMachineInsn(); + if (!IsWriteRefFieldCallInsn(insn)) { + return; + } + if (!hasWriteFieldCall) { + if (!WriteFieldCallOptPatternMatch(insn, firstCallParam, paramDefInsns)) { + return; + } + prevCallInsn = &insn; + hasWriteFieldCall = true; + return; + } + WriteRefFieldParam currentCallParam; + if (!WriteFieldCallOptPatternMatch(insn, currentCallParam, paramDefInsns)) { + return; + } + if (prevCallInsn == nullptr || MayThrowBetweenInsn(*prevCallInsn, insn)) { + return; + } + if (firstCallParam.objOpnd == nullptr || currentCallParam.objOpnd == nullptr || + currentCallParam.fieldBaseOpnd == nullptr) { + return; + } + if (!RegOperand::IsSameReg(*firstCallParam.objOpnd, *currentCallParam.objOpnd)) { + return; + } + MemOperand &addr = + aarch64CGFunc->CreateMemOpnd(*currentCallParam.fieldBaseOpnd, currentCallParam.fieldOffset, k64BitSize); + Insn &strInsn = cgFunc.GetCG()->BuildInstruction(MOP_xstr, *currentCallParam.fieldValue, addr); + strInsn.AppendComment("store reference field"); + strInsn.MarkAsAccessRefField(true); + bb.InsertInsnAfter(insn, strInsn); + for (Insn *paramDefInsn : paramDefInsns) { + bb.RemoveInsn(*paramDefInsn); + } + bb.RemoveInsn(insn); + prevCallInsn = &strInsn; + nextInsn = strInsn.GetNextMachineInsn(); +} + +void RemoveDecRefAArch64::Run(BB &bb, Insn &insn) { + if (insn.GetMachineOpcode() != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_DecRef_NaiveRCFast") { + return; + } + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if ((mopMov != MOP_xmovrr && mopMov != MOP_xmovri64) || + static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != R0) { + return; + } + Operand &srcOpndOfMov = insnMov->GetOperand(kInsnSecondOpnd); + if (!srcOpndOfMov.IsZeroRegister() && + !(srcOpndOfMov.IsImmediate() && static_cast(srcOpndOfMov).GetValue() == 0)) { + return; + } + bb.RemoveInsn(*insnMov); + bb.RemoveInsn(insn); + bb.SetKind(BB::kBBFallthru); +} + +/* + * Find 5 insn with certain OP code + * 1 : MOP_xaddrri12 + * 2 : MOP_waddrrr + * 3 : MOP_waddrri12 + * 4 : MOP_xsxtw64 + * 5 : MOP_xaddrrrs + */ +bool ComputationTreeAArch64::FindComputationTree(std::vector &optInsn, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + optInsn.clear(); + /* first */ + if (thisMop != MOP_xaddrri12) { + return false; + } + optInsn.emplace_back(&insn); + /* second */ + Insn *nextInsn1 = insn.GetNextMachineInsn(); + if (nextInsn1 == nullptr) { + return false; + } + MOperator nextMop1 = nextInsn1->GetMachineOpcode(); + if (nextMop1 != MOP_waddrrr) { + return false; + } + optInsn.emplace_back(nextInsn1); + /* third */ + Insn *nextInsn2 = nextInsn1->GetNextMachineInsn(); + if (nextInsn2 == nullptr) { + return false; + } + MOperator nextMop2 = nextInsn2->GetMachineOpcode(); + if (nextMop2 != MOP_waddrri12) { + return false; + } + optInsn.emplace_back(nextInsn2); + /* forth */ + Insn *nextInsn3 = nextInsn2->GetNextMachineInsn(); + if (nextInsn3 == nullptr) { + return false; + } + MOperator nextMop3 = nextInsn3->GetMachineOpcode(); + if (nextMop3 != MOP_xsxtw64) { + return false; + } + optInsn.emplace_back(nextInsn3); + /* fifth */ + Insn *nextInsn4 = nextInsn3->GetNextMachineInsn(); + if (nextInsn4 == nullptr) { + return false; + } + MOperator nextMop4 = nextInsn4->GetMachineOpcode(); + if (nextMop4 != MOP_xaddrrrs) { + return false; + } + optInsn.emplace_back(nextInsn4); + return true; +} + +/* + * Make sure the insn in opt_insn match the pattern as following: + * add x1, x1, #16 + * add w2, w10, w10 + * add w2, w2, #1 + * sxtw x2, w2 + * add x1, x1, x2, LSL #3 + * bl MCC_LoadRefField_NaiveRCFast + */ +bool ComputationTreeAArch64::IsPatternMatch(const std::vector &optInsn) const { + /* this speific pattern has exactly four insns */ + if (optInsn.size() <= 4) { + ERR(kLncErr, "access opt_insn failed"); + return false; + } + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + Insn *insn5 = optInsn[++insnNum]; + ASSERT(insnNum == 4, "match pattern failed in AArch64Peep::PatternIsMatch"); + Insn *insn6 = insn5->GetNext(); + if (insn6 != nullptr && insn6->GetMachineOpcode() != MOP_xbl && insn6->GetMachineOpcode() != MOP_tail_call_opt_xbl) { + return false; + } + CHECK_FATAL(insn6 != nullptr, "Insn null ptr check"); + auto &funcNameOpnd = static_cast(insn6->GetOperand(kInsnFirstOpnd)); + if (&(insn1->GetOperand(kInsnFirstOpnd)) == &(insn5->GetOperand(kInsnSecondOpnd)) && + &(insn2->GetOperand(kInsnSecondOpnd)) == &(insn2->GetOperand(kInsnThirdOpnd)) && + &(insn2->GetOperand(kInsnFirstOpnd)) == &(insn3->GetOperand(kInsnSecondOpnd)) && + &(insn3->GetOperand(kInsnFirstOpnd)) == &(insn4->GetOperand(kInsnSecondOpnd)) && + &(insn4->GetOperand(kInsnFirstOpnd)) == &(insn5->GetOperand(kInsnThirdOpnd)) && + funcNameOpnd.GetName() == "MCC_LoadRefField_NaiveRCFast" && + static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetValue() == k16BitSize && + static_cast(insn3->GetOperand(kInsnThirdOpnd)).GetValue() == 1) { + return true; + } + return false; +} + +void ComputationTreeAArch64::Run(BB &bb, Insn &insn) { + std::vector optInsn; + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (!insn.IsMachineInstruction()) { + return; + } + /* found pattern */ + if (FindComputationTree(optInsn, insn) && IsPatternMatch(optInsn)) { + Insn *sxtwInsn = optInsn[4]; // The pattern must has four insns. + CHECK_FATAL(sxtwInsn->GetOperand(kInsnFourthOpnd).GetKind() == Operand::kOpdShift, "should not happened"); + auto &lsl = static_cast(sxtwInsn->GetOperand(kInsnFourthOpnd)); + Operand *sxtw = nullptr; + Operand *imm = nullptr; + int32 lslBitLenth = 3; + uint32 lslShiftAmountCaseA = 3; + uint32 lslShiftAmountCaseB = 2; + int32 oriAddEnd = 16; + if (lsl.GetShiftAmount() == lslShiftAmountCaseA) { + sxtw = &aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, + lslShiftAmountCaseA + 1, lslBitLenth); + imm = &aarch64CGFunc->CreateImmOperand(oriAddEnd + static_cast(1ULL << lslShiftAmountCaseA), + kMaxImmVal12Bits, true); + } else if (lsl.GetShiftAmount() == lslShiftAmountCaseB) { + sxtw = &aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, + lslShiftAmountCaseB + 1, lslBitLenth); + imm = &aarch64CGFunc->CreateImmOperand(oriAddEnd + static_cast(1ULL << lslShiftAmountCaseB), + kMaxImmVal12Bits, true); + } + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_xxwaddrrre, + sxtwInsn->GetOperand(kInsnFirstOpnd), + optInsn[0]->GetOperand(kInsnSecondOpnd), + optInsn[1]->GetOperand(kInsnSecondOpnd), *sxtw); + bb.ReplaceInsn(*sxtwInsn, newInsn); + Insn &newAdd = + cgFunc.GetCG()->BuildInstruction(MOP_xaddrri12, sxtwInsn->GetOperand(kInsnFirstOpnd), + sxtwInsn->GetOperand(kInsnFirstOpnd), *imm); + (void)bb.InsertInsnAfter(newInsn, newAdd); + optInsn.clear(); + } +} + +/* + * We optimize the following pattern in this function: + * and x1, x1, #imm (is n power of 2) + * cbz/cbnz x1, .label + * => + * and x1, x1, #imm (is n power of 2) + * tbnz/tbz x1, #n, .label + */ +void OneHoleBranchesAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (&insn != bb.GetLastInsn()) { + return; + } + /* check cbz/cbnz insn */ + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_wcbz && thisMop != MOP_wcbnz && thisMop != MOP_xcbz && thisMop != MOP_xcbnz) { + return; + } + /* check and insn */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_wandrri12 && prevMop != MOP_xandrri13) { + return; + } + /* check opearnd of two insns */ + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + auto &imm = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + int n = logValueAtBase2(imm.GetValue()); + if (n < 0) { + return; + } + + /* replace insn */ + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + MOperator newOp = MOP_undef; + switch (thisMop) { + case MOP_wcbz: + newOp = MOP_wtbz; + break; + case MOP_wcbnz: + newOp = MOP_wtbnz; + break; + case MOP_xcbz: + newOp = MOP_xtbz; + break; + case MOP_xcbnz: + newOp = MOP_xtbnz; + break; + default: + CHECK_FATAL(false, "can not touch here"); + break; + } + ImmOperand &oneHoleOpnd = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction( + newOp, prevInsn->GetOperand(kInsnSecondOpnd), oneHoleOpnd, label)); + bb.RemoveInsn(insn); +} + +void ReplaceIncDecWithIncAArch64::Run(BB &bb, Insn &insn) { + if (insn.GetMachineOpcode() != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_IncDecRef_NaiveRCFast") { + return; + } + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if (mopMov != MOP_xmovrr) { + return; + } + if (static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != R1 || + !insnMov->GetOperand(kInsnSecondOpnd).IsZeroRegister()) { + return; + } + std::string funcName = "MCC_IncRef_NaiveRCFast"; + GStrIdx strIdx = GlobalTables::GetStrTable().GetStrIdxFromName(funcName); + MIRSymbol *st = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(strIdx, true); + if (st == nullptr) { + LogInfo::MapleLogger() << "WARNING: Replace IncDec With Inc fail due to no MCC_IncRef_NaiveRCFast func\n"; + return; + } + bb.RemoveInsn(*insnMov); + target.SetFunctionSymbol(*st); +} + + +void AndCmpBranchesToTbzAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (&insn != bb.GetLastInsn()) { + return; + } + MOperator mopB = insn.GetMachineOpcode(); + if (mopB != MOP_beq && mopB != MOP_bne) { + return; + } + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + /* get the instruction before bne/beq, expects its type is cmp. */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_wcmpri && prevMop != MOP_xcmpri) { + return; + } + + /* get the instruction before "cmp", expect its type is "and". */ + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr) { + return; + } + MOperator mopAnd = prevPrevInsn->GetMachineOpcode(); + if (mopAnd != MOP_wandrri12 && mopAnd != MOP_xandrri13) { + return; + } + + /* + * check operand + * + * the real register of "cmp" and "and" must be the same. + */ + if (&(prevInsn->GetOperand(kInsnSecondOpnd)) != &(prevPrevInsn->GetOperand(kInsnFirstOpnd))) { + return; + } + + int opndIdx = 2; + if (!prevPrevInsn->GetOperand(opndIdx).IsIntImmediate() || !prevInsn->GetOperand(opndIdx).IsIntImmediate()) { + return; + } + auto &immAnd = static_cast(prevPrevInsn->GetOperand(opndIdx)); + auto &immCmp = static_cast(prevInsn->GetOperand(opndIdx)); + if (immCmp.GetValue() == 0) { + int n = logValueAtBase2(immAnd.GetValue()); + if (n < 0) { + return; + } + /* judge whether the flag_reg and "w0" is live later. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + auto &cmpReg = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB()) || FindRegLiveOut(cmpReg, *prevInsn->GetBB())) { + return; + } + MOperator mopNew = MOP_undef; + switch (mopB) { + case MOP_beq: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbz; + } + break; + case MOP_bne: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbnz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbnz; + } + break; + default: + CHECK_FATAL(false, "expects beq or bne insn"); + break; + } + ImmOperand &newImm = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(mopNew, + prevPrevInsn->GetOperand(kInsnSecondOpnd), newImm, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } else { + int n = logValueAtBase2(immAnd.GetValue()); + int m = logValueAtBase2(immCmp.GetValue()); + if (n < 0 || m < 0 || n != m) { + return; + } + /* judge whether the flag_reg and "w0" is live later. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + auto &cmpReg = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB()) || FindRegLiveOut(cmpReg, *prevInsn->GetBB())) { + return; + } + MOperator mopNew = MOP_undef; + switch (mopB) { + case MOP_beq: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbnz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbnz; + } + break; + case MOP_bne: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbz; + } + break; + default: + CHECK_FATAL(false, "expects beq or bne insn"); + break; + } + ImmOperand &newImm = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(mopNew, + prevPrevInsn->GetOperand(kInsnSecondOpnd), newImm, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_proepilog.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_proepilog.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5fdd370ffea7919ee88224298bd9a52b7a0d4008 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_proepilog.cpp @@ -0,0 +1,1747 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_proepilog.h" +#include "cg_option.h" + +namespace maplebe { +using namespace maple; + +namespace { +const std::set kFrameWhiteListFunc { +#include "framewhitelist.def" +}; + +bool IsFuncNeedFrame(const std::string &funcName) { + return kFrameWhiteListFunc.find(funcName) != kFrameWhiteListFunc.end(); +} +constexpr uint32 k2BitSize = 2; +constexpr int32 kSoeChckOffset = 8192; + +enum RegsPushPop : uint8 { + kRegsPushOp, + kRegsPopOp +}; + +enum PushPopType : uint8 { + kPushPopSingle = 0, + kPushPopPair = 1 +}; + +MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = { + { /* push */ + { 0 /* undef */ }, + { /* kRegTyInt */ + MOP_xstr, /* single */ + MOP_xstp, /* pair */ + }, + { /* kRegTyFloat */ + MOP_dstr, /* single */ + MOP_dstp, /* pair */ + }, + }, + { /* pop */ + { 0 /* undef */ }, + { /* kRegTyInt */ + MOP_xldr, /* single */ + MOP_xldp, /* pair */ + }, + { /* kRegTyFloat */ + MOP_dldr, /* single */ + MOP_dldp, /* pair */ + }, + } +}; + +inline void AppendInstructionTo(Insn &insn, CGFunc &func) { + func.GetCurBB()->AppendInsn(insn); +} +} + +bool AArch64GenProEpilog::HasLoop() { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsBackEdgeDest()) { + return true; + } + FOR_BB_INSNS_REV(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + if (insn->HasLoop()) { + return true; + } + } + } + return false; +} + +/* + * Remove redundant mov and mark optimizable bl/blr insn in the BB. + * Return value: true if is empty bb, otherwise false. + */ +bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, std::set &callInsns) { + FOR_BB_INSNS_REV_SAFE(insn, &bb, prev_insn) { + if (!insn->IsMachineInstruction()) { + continue; + } + MOperator insnMop = insn->GetMachineOpcode(); + switch (insnMop) { + case MOP_wmovrr: + case MOP_xmovrr: { + CHECK_FATAL(insn->GetOperand(0).IsRegister(), "operand0 is not register"); + CHECK_FATAL(insn->GetOperand(1).IsRegister(), "operand1 is not register"); + auto ®1 = static_cast(insn->GetOperand(0)); + auto ®2 = static_cast(insn->GetOperand(1)); + + if (reg1.GetRegisterNumber() != R0 || reg2.GetRegisterNumber() != R0) { + return false; + } + + bb.RemoveInsn(*insn); + break; + } + case MOP_xbl: + case MOP_xblr: { + (void)callInsns.insert(insn); + return false; + } + default: + return false; + } + } + + return true; +} + +/* Recursively invoke this function until exitBB's precursor not exist. */ +void AArch64GenProEpilog::TailCallBBOpt(const BB &exitBB, std::set &callInsns) { + for (auto tmpBB : exitBB.GetPreds()) { + if (tmpBB->GetSuccs().size() != 1 || !tmpBB->GetEhSuccs().empty() || tmpBB->GetKind() != BB::kBBFallthru) { + continue; + } + + if (OptimizeTailBB(*tmpBB, callInsns)) { + TailCallBBOpt(*tmpBB, callInsns); + } + } +} + +/* + * If a function without callee-saved register, and end with a function call, + * then transfer bl/blr to b/br. + * Return value: true if function do not need Prologue/Epilogue. false otherwise. + */ +bool AArch64GenProEpilog::TailCallOpt() { + auto &aarchCGFunc = static_cast(cgFunc); + BB *exitBB = nullptr; + const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + + size_t calleeSavedRegSize = 2; + CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?"); + + if (regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() || HasLoop() || + cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive) || IsFuncNeedFrame(cgFunc.GetName())) { + return false; + } + + size_t exitBBSize = cgFunc.GetExitBBsVec().size(); + CHECK_FATAL(exitBBSize == 1, "Should not be exist multiple exits."); + + if (exitBBSize == 0) { + if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() && + cgFunc.GetLastBB()->GetPrev()->GetPrev() != nullptr) { + exitBB = cgFunc.GetLastBB()->GetPrev()->GetPrev(); + } else { + exitBB = cgFunc.GetLastBB()->GetPrev(); + } + } else { + exitBB = cgFunc.GetExitBBsVec().front(); + } + + CHECK_FATAL(exitBB->GetFirstInsn() == nullptr, "exit bb should be empty."); + + /* Count how many call insns in the whole function. */ + uint32 nCount = 0; + bool hasGetStackClass = false; + + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS(insn, bb) { + if (insn->IsCall()) { + if (insn->GetMachineOpcode() == MOP_xbl) { + auto &target = static_cast(insn->GetOperand(0)); + if (IsFuncNeedFrame(target.GetName())) { + hasGetStackClass = true; + } + } + ++nCount; + } + } + } + + if ((nCount > 0 && cgFunc.GetFunction().GetAttr(FUNCATTR_interface)) || hasGetStackClass) { + return false; + } + + std::set callInsns; + TailCallBBOpt(*exitBB, callInsns); + + if (nCount != callInsns.size()) { + return false; + } + /* Replace all of the call insns. */ + for (auto callInsn : callInsns) { + MOperator insnMop = callInsn->GetMachineOpcode(); + switch (insnMop) { + case MOP_xbl: { + callInsn->SetMOP(MOP_tail_call_opt_xbl); + break; + } + case MOP_xblr: { + callInsn->SetMOP(MOP_tail_call_opt_xblr); + break; + } + default: + ASSERT(false, "Internal error."); + break; + } + } + return true; +} + +void AArch64GenProEpilog::GenStackGuard(BB &bb) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (currCG->AddStackGuard()) { + BB *formerCurBB = cgFunc.GetCurBB(); + aarchCGFunc.GetDummyBB()->ClearInsns(); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(true); + cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB()); + + MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard"))); + StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0); + AArch64RegOperand &stAddrOpnd = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, kRegTyInt); + aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd); + + AArch64MemOperand *guardMemOp = + aarchCGFunc.GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, kSizeOfPtr * kBitsPerByte, + stAddrOpnd, nullptr, &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), stkGuardSym); + MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64); + Insn &insn = currCG->BuildInstruction(mOp, stAddrOpnd, *guardMemOp); + insn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(insn); + + int vArea = 0; + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + if (ml->GetSizeOfGRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (ml->GetSizeOfVRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } + } + + int32 stkSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + static_cast(cgFunc.GetMemlayout())->SizeOfArgsToStackPass() - vArea; + AArch64MemOperand *downStk = + aarchCGFunc.GetMemoryPool()->New(RFP, stkSize - kOffset8MemPos - vArea, + kSizeOfPtr * kBitsPerByte); + if (downStk->GetMemVaryType() == kNotVary && + aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) { + downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10); + } + mOp = aarchCGFunc.PickStInsn(kSizeOfPtr * kBitsPerByte, PTY_u64); + Insn &tmpInsn = currCG->BuildInstruction(mOp, stAddrOpnd, *downStk); + tmpInsn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(tmpInsn); + + bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB()); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(false); + cgFunc.SetCurBB(*formerCurBB); + } +} + +BB &AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb) { + CG *currCG = cgFunc.GetCG(); + if (!currCG->AddStackGuard()) { + return bb; + } + + BB *formerCurBB = cgFunc.GetCurBB(); + cgFunc.GetDummyBB()->ClearInsns(); + cgFunc.SetCurBB(*(cgFunc.GetDummyBB())); + auto &aarchCGFunc = static_cast(cgFunc); + + const MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard"))); + StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0); + AArch64RegOperand &stAddrOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, + kRegTyInt); + aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd); + + AArch64MemOperand *guardMemOp = + cgFunc.GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, + kSizeOfPtr * kBitsPerByte, stAddrOpnd, nullptr, + &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), + stkGuardSym); + MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64); + Insn &insn = currCG->BuildInstruction(mOp, stAddrOpnd, *guardMemOp); + insn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(insn); + + int vArea = 0; + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + if (ml->GetSizeOfGRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (ml->GetSizeOfVRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } + } + + AArch64RegOperand &checkOp = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, kSizeOfPtr * kBitsPerByte, kRegTyInt); + int32 stkSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + static_cast(cgFunc.GetMemlayout())->SizeOfArgsToStackPass() - vArea; + AArch64MemOperand *downStk = + aarchCGFunc.GetMemoryPool()->New(RFP, stkSize - kOffset8MemPos - vArea, + kSizeOfPtr * kBitsPerByte); + if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) { + downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*static_cast(downStk), k64BitSize, R10); + } + mOp = aarchCGFunc.PickLdInsn(kSizeOfPtr * kBitsPerByte, PTY_u64); + Insn &newInsn = currCG->BuildInstruction(mOp, checkOp, *downStk); + newInsn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(newInsn); + + cgFunc.SelectBxor(stAddrOpnd, stAddrOpnd, checkOp, PTY_u64); + LabelIdx failLable = aarchCGFunc.CreateLabel(); + aarchCGFunc.SelectCondGoto(aarchCGFunc.GetOrCreateLabelOperand(failLable), OP_brtrue, OP_eq, + stAddrOpnd, aarchCGFunc.CreateImmOperand(0, k64BitSize, false), PTY_u64); + + MIRSymbol *failFunc = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_fail"))); + AArch64ListOperand *srcOpnds = + cgFunc.GetMemoryPool()->New(*cgFunc.GetFuncScopeAllocator()); + Insn &callInsn = aarchCGFunc.AppendCall(*failFunc, *srcOpnds); + callInsn.SetDoNotRemove(true); + + bb.AppendBBInsns(*(cgFunc.GetCurBB())); + + BB *newBB = cgFunc.CreateNewBB(failLable, bb.IsUnreachable(), bb.GetKind(), bb.GetFrequency()); + bb.AppendBB(*newBB); + if (cgFunc.GetLastBB() == &bb) { + cgFunc.SetLastBB(*newBB); + } + bb.SetKind(BB::kBBFallthru); + bb.PushBackSuccs(*newBB); + newBB->PushBackPreds(bb); + + cgFunc.SetCurBB(*formerCurBB); + return *newBB; +} + +/* + * The following functions are for pattern matching for fast path + * where function has early return of spedified pattern load/test/return + */ +void AArch64GenProEpilog::ReplaceMachedOperand(Insn &orig, Insn &target, const RegOperand &matchedOpnd, + bool isFirstDst) { + auto &aarchCGFunc = static_cast(cgFunc); + for (uint32 i = 0; i < target.GetOpndNum(); ++i) { + Operand *src = target.GetOpnd(i); + CHECK_FATAL(src != nullptr, "src is null in ReplaceMachedOperand"); + if (src->IsRegister()) { + RegOperand *reg = static_cast(src); + if (reg != &matchedOpnd) { + continue; + } + if (isFirstDst) { + Operand *origSrc = orig.GetOpnd(0); + RegOperand *origPhys = static_cast(origSrc); + CHECK_FATAL(origPhys != nullptr, "pointer is null"); + regno_t regNO = origPhys->GetRegisterNumber(); + RegOperand &phys = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(regNO), + matchedOpnd.GetSize(), kRegTyInt); + target.SetOpnd(i, phys); + } else { + /* Replace the operand with the src of inst */ + RegOperand &phys = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(R16), + reg->GetSize(), kRegTyInt); + target.SetOpnd(i, phys); + } + return; + } + if (src->IsMemoryAccessOperand()) { + MemOperand *memOpnd = static_cast(src); + Operand *base = memOpnd->GetBaseRegister(); + Operand *offset = memOpnd->GetIndexRegister(); + if (base != nullptr && base->IsRegister()) { + RegOperand *reg = static_cast(base); + if (reg != &matchedOpnd) { + continue; + } + if (isFirstDst) { + Operand *origSrc = orig.GetOpnd(0); + RegOperand *origPhys = static_cast(origSrc); + CHECK_FATAL(origPhys != nullptr, "orig_phys cast failed"); + regno_t regNO = origPhys->GetRegisterNumber(); + RegOperand &phys = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(regNO), + matchedOpnd.GetSize(), kRegTyInt); + memOpnd->SetBaseRegister(phys); + } else { + /* Replace the operand with the src of inst */ + RegOperand &phys = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(R16), base->GetSize(), kRegTyInt); + memOpnd->SetBaseRegister(phys); + } + return; + } + if (offset != nullptr && offset->IsRegister()) { + RegOperand *reg = static_cast(offset); + if (reg != &matchedOpnd) { + continue; + } + if (isFirstDst) { + Operand *origSrc = orig.GetOpnd(0); + RegOperand *origPhys = static_cast(origSrc); + CHECK_FATAL(origPhys != nullptr, "orig_phys is nullptr in ReplaceMachedOperand"); + regno_t regNO = origPhys->GetRegisterNumber(); + RegOperand &phys = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(regNO), + matchedOpnd.GetSize(), kRegTyInt); + memOpnd->SetIndexRegister(phys); + } else { + /* Replace the operand with the src of inst */ + RegOperand &phys = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(R16), + offset->GetSize(), kRegTyInt); + memOpnd->SetIndexRegister(phys); + } + return; + } + } + } + if (!isFirstDst) { + RegOperand &phys = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(R16), + matchedOpnd.GetSize(), kRegTyInt); + orig.SetResult(0, phys); + } +} + +bool AArch64GenProEpilog::BackwardFindDependency(BB &ifbb, RegOperand &targetOpnd, Insn *&load, + Insn *&mov, Insn *&depMov, std::list &list) { + load = nullptr; + mov = nullptr; + depMov = nullptr; + BB *pred = &ifbb; + /* + * Pattern match, (*) instruction are moved down below branch. + * mov reg1, R0 mov reg1, R0 * + * ld Rx , [reg1, const] ld R16 , [R0, const] + * mov reg2, Rx => mov reg2, R16 <- this may exist * + * mov Rx , R16 <- replicate * + * cbr Rx, label cbr R16, label + * + * R16 is used because it is used as spill register. + * Need to modify if different register allcoation mechanism is used. + */ + do { + FOR_BB_INSNS_REV(insn, pred) { + if (insn == ifbb.GetLastInsn()) { + continue; + } + if (insn->IsImmaterialInsn()) { + continue; + } + if (!insn->IsMachineInstruction()) { + continue; + } + + bool found = false; /* allowing for only one src to be register */ + for (uint32 r = 0; r < insn->GetResultNum(); ++r) { + Operand *dst = insn->GetResult(r); + CHECK_FATAL(dst != nullptr, "pointer is null"); + if (!dst->IsRegister()) { + continue; + } + RegOperand *regOpnd = static_cast(dst); + if (regOpnd != &targetOpnd) { + continue; + } + if (load != nullptr) { + if (mov != nullptr) { + return false; + } + MOperator opCode = insn->GetMachineOpcode(); + if (opCode != MOP_xmovrr) { + return false; + } + Operand *mvSrc = insn->GetOpnd(0); + RegOperand *mvRegSrc = static_cast(mvSrc); + CHECK_FATAL(mvRegSrc != nullptr, "mv_regsrc cast failed"); + regno_t mvReg = mvRegSrc->GetRegisterNumber(); + /* make it very specific for now */ + if (mvReg != R0) { + return false; + } + Operand *mvDst = insn->GetResult(0); + RegOperand *mvRegDst = static_cast(mvDst); + CHECK_FATAL(mvRegDst != nullptr, "mv_regdst cast failed"); + mvReg = mvRegDst->GetRegisterNumber(); + if (mvReg != R20) { + return false; + } + mov = insn; + } + /* Found def, continue dep chain with src */ + for (uint32 s = 0; s < insn->GetOpndNum(); ++s) { + Operand *src = insn->GetOpnd(s); + CHECK_FATAL(src != nullptr, "src is nullptr in BackwardFindDependency"); + if (src->IsRegister()) { + if (found) { + return false; + } + RegOperand *preg = static_cast(src); + targetOpnd = *preg; + if (!preg->IsPhysicalRegister() || insn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + /* + * Skipping the start of the dependency chain because + * the the parameter reg will be propagated leaving + * the mov instruction alone to be relocated down + * to the cold path. + */ + found = false; + } else if (src->IsMemoryAccessOperand()) { + MemOperand *memOpnd = static_cast(src); + Operand *base = memOpnd->GetBaseRegister(); + Operand *offset = memOpnd->GetIndexRegister(); + if (base != nullptr && base->IsRegister()) { + if (found) { + return false; + } + load = insn; + targetOpnd = *(static_cast(base)); + found = true; + Operand *ldDst = insn->GetResult(0); + RegOperand *ldRdst = static_cast(ldDst); + CHECK_FATAL(ldRdst != nullptr, "ld_rdst is nullptr in BackwardFindDependency"); + if (ldRdst->GetRegisterNumber() != R1) { + return false; /* hard code for now. */ + } + /* Make sure instruction depending on load is mov and cond br */ + for (Insn *ni = insn->GetNext(); ni != nullptr; ni = ni->GetNext()) { + if (ni->GetMachineOpcode() == MOP_xmovrr || ni->GetMachineOpcode() == MOP_wmovrr) { + Operand *dep = ni->GetOpnd(0); + RegOperand *rdep = static_cast(dep); + if (rdep == ldRdst) { + if (depMov != nullptr) { + return false; + } + depMov = ni; + } + } + } + } + if (offset != nullptr && offset->IsRegister()) { + return false; + } + } + } + } + if (!found) { + list.push_back(insn); + } + } + if (pred->GetPreds().empty()) { + break; + } + pred = pred->GetPreds().front(); + } while (pred != nullptr); + + return true; +} + +void AArch64GenProEpilog::ForwardPropagateAndRename(Insn &mov, Insn &load, const BB &terminateBB) { + /* + * This is specialized function to work with IsolateFastPath(). + * The control flow and instruction pattern must be recognized. + */ + Insn *insn = &mov; + bool isFirstDst = true; + /* process mov and load two instructions */ + for (int32 i = 0; i < 2; ++i) { + /* Finish the bb the mov is in */ + for (Insn *target = insn->GetNext(); target != nullptr; target = target->GetNext()) { + if (target->IsImmaterialInsn()) { + continue; + } + if (!target->IsMachineInstruction()) { + continue; + } + Operand *dst = insn->GetResult(0); + RegOperand *rdst = static_cast(dst); + CHECK_FATAL(rdst != nullptr, "rdst is nullptr in ForwardPropagateAndRename"); + ReplaceMachedOperand(*insn, *target, *rdst, isFirstDst); + } + CHECK_FATAL(!insn->GetBB()->GetSuccs().empty(), "null succs check!"); + BB *bb = insn->GetBB()->GetSuccs().front(); + while (1) { + FOR_BB_INSNS(target, bb) { + if (!target->IsMachineInstruction()) { + continue; + } + Operand *dst = insn->GetResult(0); + RegOperand *rdst = static_cast(dst); + CHECK_FATAL(rdst != nullptr, "rdst is nullptr in ForwardPropagateAndRename"); + ReplaceMachedOperand(*insn, *target, *rdst, isFirstDst); + } + if (bb == &terminateBB) { + break; + } + CHECK_FATAL(!bb->GetSuccs().empty(), "null succs check!"); + bb = bb->GetSuccs().front(); + } + insn = &load; + isFirstDst = false; + } +} + +BB *AArch64GenProEpilog::IsolateFastPath(BB &bb) { + /* + * Detect "if (cond) return" fast path, and move extra instructions + * to the slow path. + * Must match the following block structure. BB1 can be a series of + * single-pred/single-succ blocks. + * BB1 ops1 cmp-br to BB3 BB1 cmp-br to BB3 + * BB2 ops2 br to retBB ==> BB2 ret + * BB3 slow path BB3 ops1 ops2 + * BB3 will be used to generate prolog stuff. + */ + if (bb.GetPrev() != nullptr) { + return nullptr; + } + BB *ifBB = nullptr; + BB *returnBB = nullptr; + BB *coldBB = nullptr; + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + { + BB &curBB = bb; + /* Look for straight line code */ + while (1) { + if (curBB.GetEhSuccs().empty()) { + return nullptr; + } + if (curBB.GetSuccs().size() == 1) { + if (curBB.HasCall()) { + return nullptr; + } + BB *succ = curBB.GetSuccs().front(); + if (succ->GetPreds().size() != 1 || !succ->GetEhPreds().empty()) { + return nullptr; + } + curBB = *succ; + } else if (curBB.GetKind() == BB::kBBIf) { + ifBB = &curBB; + break; + } else { + return nullptr; + } + } + } + /* targets of if bb can only be reached by if bb */ + { + CHECK_FATAL(!ifBB->GetSuccs().empty(), "null succs check!"); + BB *first = ifBB->GetSuccs().front(); + BB *second = ifBB->GetSuccs().back(); + if (first->GetPreds().size() != 1 || !first->GetEhPreds().empty()) { + return nullptr; + } + if (second->GetPreds().size() != 1 || !second->GetEhPreds().empty()) { + return nullptr; + } + + /* One target of the if bb jumps to a return bb */ + CHECK_FATAL(!first->GetSuccs().empty(), "null succs check!"); + if (first->GetKind() != BB::kBBGoto || first->GetSuccs().front()->GetKind() != BB::kBBReturn) { + return nullptr; + } + if (second->GetSuccs().empty()) { + return nullptr; + } + returnBB = first; + coldBB = second; + } + /* + * The control flow matches at this point. + * Make sure the hot bb contains atmost a + * 'mov x0, value' and 'b'. + */ + { + CHECK_FATAL(returnBB != nullptr, "null ptr check"); + const int32 twoInsnInReturnBB = 2; + if (returnBB->NumInsn() > twoInsnInReturnBB) { + return nullptr; + } + Insn *first = returnBB->GetFirstInsn(); + while (first->IsImmaterialInsn()) { + first = first->GetNext(); + } + if (first == returnBB->GetLastInsn()) { + if (!first->IsBranch()) { + return nullptr; + } + } else { + MOperator opCode = first->GetMachineOpcode(); + /* only allow mov constant */ + if (opCode != MOP_xmovri64 && opCode != MOP_xmovri32) { + return nullptr; + } + Insn *last = returnBB->GetLastInsn(); + if (!last->IsBranch()) { + return nullptr; + } + } + } + + /* + * Resolve any register usage issues. + * 1) Any use of parameter registes must be renamed + * 2) Any usage of callee saved register that needs saving in prolog + * must be able to move down into the cold path. + */ + + /* Find the branch's src register for backward propagation. */ + Insn *condBr = ifBB->GetLastInsn(); + auto &targetOpnd = static_cast(condBr->GetOperand(0)); + if (targetOpnd.GetRegisterType() != kRegTyInt) { + return nullptr; + } + + /* Search backward looking for dependencies for the cond branch */ + std::list insnList; /* instructions to be moved to coldbb */ + Insn *ld = nullptr; + Insn *mv = nullptr; + Insn *depMv = nullptr; + /* + * The mv is the 1st move using the parameter register leading to the branch + * The ld is the load using the parameter register indirectly for the branch + * The depMv is the move which preserves the result of the load but might + * destroy a parameter register which will be moved below the branch. + */ + if (!BackwardFindDependency(*ifBB, targetOpnd, ld, mv, depMv, insnList)) { + return nullptr; + } + if (ld == nullptr || mv == nullptr) { + return nullptr; + } + /* + * depMv can be live out, so duplicate it + * and set dest to output of ld and src R16 + */ + if (depMv != nullptr) { + CHECK_FATAL(depMv->GetMachineOpcode(), "return check"); + Insn &newMv = currCG->BuildInstruction( + depMv->GetMachineOpcode(), ld->GetOperand(0), + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(R16), + depMv->GetOperand(1).GetSize(), kRegTyInt)); + insnList.push_front(&newMv); + /* temporary put it some where */ + CHECK_FATAL(coldBB != nullptr, "null ptr check"); + static_cast(coldBB->InsertInsnBegin(newMv)); + } else { + uint32 regSize = ld->GetOperand(0).GetSize(); + Insn &newMv = currCG->BuildInstruction( + (regSize <= k32BitSize) ? MOP_xmovri32 : MOP_xmovri64, ld->GetOperand(0), + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(R16), regSize, kRegTyInt)); + insnList.push_front(&newMv); + /* temporary put it some where */ + CHECK_FATAL(coldBB != nullptr, "null ptr check"); + static_cast(coldBB->InsertInsnBegin(newMv)); + } + + ForwardPropagateAndRename(*mv, *ld, *returnBB); + + for (auto *in : insnList) { + in->GetBB()->RemoveInsn(*in); + CHECK_FATAL(coldBB != nullptr, "null ptr check"); + static_cast(coldBB->InsertInsnBegin(*in)); + } + + /* All instructions are in the right place, replace branch to ret bb to just ret. */ + CHECK_FATAL(returnBB != nullptr, "null ptr check"); + returnBB->RemoveInsn(*returnBB->GetLastInsn()); + returnBB->AppendInsn(currCG->BuildInstruction(MOP_xret)); + /* bb is now a retbb and has no succ. */ + returnBB->SetKind(BB::kBBReturn); + BB *tgtBB = returnBB->GetSuccs().front(); + auto predIt = std::find(tgtBB->GetPredsBegin(), tgtBB->GetPredsEnd(), returnBB); + tgtBB->ErasePreds(predIt); + returnBB->ClearSuccs(); + + return coldBB; +} + +AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(const AArch64MemOperand &mo, + uint32 bitLen, + AArch64reg baseRegNum) { + auto &aarchCGFunc = static_cast(cgFunc); + CHECK_FATAL(mo.GetAddrMode() == AArch64MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi"); + AArch64OfstOperand *ofstOp = mo.GetOffsetImmediate(); + int32 offsetVal = ofstOp->GetOffsetValue(); + CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0"); + CHECK_FATAL((static_cast(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0"); + /* + * Offset adjustment due to FP/SP has already been done + * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs() + */ + AArch64RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt); + if (aarchCGFunc.GetSplitBaseOffset() == 0) { + aarchCGFunc.SetSplitBaseOffset(offsetVal); /* remember the offset; don't forget to clear it */ + ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true); + RegOperand *origBaseReg = mo.GetBaseRegister(); + aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64); + } + offsetVal = offsetVal - aarchCGFunc.GetSplitBaseOffset(); + return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, offsetVal); +} + +void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(offset, kSizeOfPtr * kBitsPerByte); + + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + CHECK_FATAL(offset >= 0, "offset must >= 0"); + if (offset > kStpLdpImm64UpperBound) { + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(*static_cast(o2), dataSize, R16); + } + Insn &pushInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + std::string comment = "SAVE CALLEE REGISTER PAIR"; + pushInsn.SetComment(comment); + AppendInstructionTo(pushInsn, cgFunc); + + /* Append CFi code */ + if (!CGOptions::IsNoCalleeCFI()) { + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + stackFrameSize -= cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + int32 cfiOffset = stackFrameSize - offset; + BB *curBB = cgFunc.GetCurBB(); + Insn *newInsn = curBB->InsertInsnAfter(pushInsn, aarchCGFunc.CreateCfiOffsetInsn(reg0, -cfiOffset, k64BitSize)); + curBB->InsertInsnAfter(*newInsn, aarchCGFunc.CreateCfiOffsetInsn(reg1, -cfiOffset + kOffset8MemPos, k64BitSize)); + } +} + +void AArch64GenProEpilog::AppendInstructionPushSingle(AArch64reg reg, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, kSizeOfPtr * kBitsPerByte, rty); + Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(offset, kSizeOfPtr * kBitsPerByte); + + AArch64MemOperand *aarchMemO1 = static_cast(o1); + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + if (aarchMemO1->GetMemVaryType() == kNotVary && + aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) { + o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9); + } + + Insn &pushInsn = currCG->BuildInstruction(mOp, o0, *o1); + std::string comment = "SAVE CALLEE REGISTER"; + pushInsn.SetComment(comment); + AppendInstructionTo(pushInsn, cgFunc); + + /* Append CFI code */ + if (!CGOptions::IsNoCalleeCFI()) { + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + stackFrameSize -= cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + int32 cfiOffset = stackFrameSize - offset; + cgFunc.GetCurBB()->InsertInsnAfter(pushInsn, + aarchCGFunc.CreateCfiOffsetInsn(reg, -cfiOffset, k64BitSize)); + } +} + +Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 argsToStkPassSize, + AArch64reg reg0, AArch64reg reg1, + RegType rty, bool isAllocate) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair]; + if (argsToStkPassSize <= kStrLdrImm64UpperBound - kOffset8MemPos) { + mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle]; + AArch64RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + AArch64MemOperand *o2 = aarchCGFunc.GetMemoryPool()->New(RSP, argsToStkPassSize, + kSizeOfPtr * kBitsPerByte); + Insn &insn1 = currCG->BuildInstruction(mOp, o0, *o2); + AppendInstructionTo(insn1, cgFunc); + AArch64RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + o2 = aarchCGFunc.GetMemoryPool()->New(RSP, argsToStkPassSize + kSizeOfPtr, + kSizeOfPtr * kBitsPerByte); + Insn &insn2 = currCG->BuildInstruction(mOp, o1, *o2); + AppendInstructionTo(insn2, cgFunc); + return insn2; + } else { + AArch64RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, kRegTyInt); + AArch64ImmOperand &io1 = aarchCGFunc.CreateImmOperand(argsToStkPassSize, k64BitSize, true); + aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64); + AArch64RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + AArch64RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + AArch64MemOperand *mo = aarchCGFunc.GetMemoryPool()->New( + AArch64MemOperand::kAddrModeBOrX, kSizeOfPtr * kBitsPerByte, rsp, oo, 0); + Insn &insn1 = currCG->BuildInstruction(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo); + AppendInstructionTo(insn1, cgFunc); + AArch64ImmOperand &io2 = aarchCGFunc.CreateImmOperand(kSizeOfPtr, k64BitSize, true); + aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64); + AArch64RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + mo = aarchCGFunc.GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOrX, + kSizeOfPtr * kBitsPerByte, rsp, oo, 0); + Insn &insn2 = currCG->BuildInstruction(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo); + AppendInstructionTo(insn2, cgFunc); + return insn2; + } +} + +Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 argsToStkPassSize, + AArch64reg reg0, AArch64reg reg1, + RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + Insn *allocInsn = nullptr; + if (argsToStkPassSize > kStpLdpImm64UpperBound) { + allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(argsToStkPassSize, reg0, reg1, rty, true); + } else { + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + Operand *o2 = aarchCGFunc.GetMemoryPool()->New(RSP, argsToStkPassSize, + kSizeOfPtr * kBitsPerByte); + allocInsn = &currCG->BuildInstruction(mOp, o0, o1, *o2); + AppendInstructionTo(*allocInsn, cgFunc); + } + if (currCG->NeedInsertInstrumentationFunction()) { + aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction()); + } else if (currCG->InstrumentWithDebugTraceCall()) { + aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction()); + } else if (currCG->InstrumentWithProfile()) { + aarchCGFunc.AppendCall(*currCG->GetProfileFunction()); + } + return *allocInsn; +} + +void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame")); + } + + Insn *ipoint = nullptr; + /* + * stackFrameSize includes the size of args to stack-pass + * if a function has neither VLA nor alloca. + */ + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + /* + * ldp/stp's imm should be within -512 and 504; + * if stp's imm > 512, we fall back to the stp-sub version + */ + bool useStpSub = false; + int64 offset = 0; + int32 cfiOffset = 0; + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + /* + * stack_frame_size == size of formal parameters + callee-saved (including FP/RL) + * + size of local vars + * + size of actuals + * (when passing more than 8 args, its caller's responsibility to + * allocate space for it. size of actuals represent largest such size in the function. + */ + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + } else { + if (stackFrameSize > kStpLdpImm64UpperBound) { + useStpSub = true; + offset = kOffset16MemPos; + stackFrameSize -= offset; + } else { + offset = stackFrameSize; + } + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + AArch64RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + AArch64RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + AArch64MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-offset, kSizeOfPtr * kBitsPerByte); + ipoint = &currCG->BuildInstruction(mOp, o0, o1, o2); + AppendInstructionTo(*ipoint, cgFunc); + cfiOffset = offset; + if (currCG->NeedInsertInstrumentationFunction()) { + aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction()); + } else if (currCG->InstrumentWithDebugTraceCall()) { + aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction()); + } else if (currCG->InstrumentWithProfile()) { + aarchCGFunc.AppendCall(*currCG->GetProfileFunction()); + } + } + + ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint); + + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + CHECK_FATAL(!useStpSub, "Invalid assumption"); + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + } + + if (useStpSub) { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true); + } + + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + int32 cfiOffsetSecond = 0; + if (useStpSub) { + cfiOffsetSecond = stackFrameSize; + ipoint = InsertCFIDefCfaOffset(cfiOffsetSecond, *ipoint); + } + cfiOffsetSecond = GetOffsetFromCFA(); + if (!cgFunc.HasVLAOrAlloca()) { + cfiOffsetSecond -= argsToStkPassSize; + } + BB *curBB = cgFunc.GetCurBB(); + ipoint = curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RFP, -cfiOffsetSecond, k64BitSize)); + curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffsetSecond + kOffset8MemPos, k64BitSize)); +} + +void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging")); + } + + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + + Insn *ipoint = nullptr; + int32 cfiOffset = 0; + + if (argsToStkPassSize > 0) { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); + cfiOffset -= argsToStkPassSize; + } else { + bool useStpSub = false; + + if (stackFrameSize > kStpLdpImm64UpperBound) { + useStpSub = true; + AArch64RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint); + } else { + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + AArch64RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + AArch64RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + AArch64MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, kSizeOfPtr * kBitsPerByte); + ipoint = &currCG->BuildInstruction(mOp, o0, o1, o2); + AppendInstructionTo(*ipoint, cgFunc); + cfiOffset = stackFrameSize; + ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint); + } + + if (useStpSub) { + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + AArch64RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + AArch64RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + AArch64MemOperand *o2 = aarchCGFunc.GetMemoryPool()->New(RSP, 0, kSizeOfPtr * kBitsPerByte); + ipoint = &currCG->BuildInstruction(mOp, o0, o1, *o2); + AppendInstructionTo(*ipoint, cgFunc); + } + + if (currCG->NeedInsertInstrumentationFunction()) { + aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction()); + } else if (currCG->InstrumentWithDebugTraceCall()) { + aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction()); + } else if (currCG->InstrumentWithProfile()) { + aarchCGFunc.AppendCall(*currCG->GetProfileFunction()); + } + + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); + } + BB *curBB = cgFunc.GetCurBB(); + ipoint = curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RFP, -cfiOffset, k64BitSize)); + curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffset + kOffset8MemPos, k64BitSize)); +} + +/* + * From AArch64 Reference Manual + * C1.3.3 Load/Store Addressing Mode + * ... + * When stack alignment checking is enabled by system software and + * the base register is the SP, the current stack pointer must be + * initially quadword aligned, that is aligned to 16 bytes. Misalignment + * generates a Stack Alignment fault. The offset does not have to + * be a multiple of 16 bytes unless the specific Load/Store instruction + * requires this. SP cannot be used as a register offset. + */ +void AArch64GenProEpilog::GeneratePushRegs() { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + + CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?"); + + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers")); + } + + /* + * Even if we don't use RFP, since we push a pair of registers in one instruction + * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call + * Make sure this is reflected when computing callee_saved_regs.size() + */ + if (!currCG->GenerateDebugFriendlyCode()) { + AppendInstructionAllocateCallFrame(RFP, RLR, kRegTyInt); + } else { + AppendInstructionAllocateCallFrameDebug(RFP, RLR, kRegTyInt); + } + + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP")); + } + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + if (argsToStkPassSize > 0) { + Operand &immOpnd = aarchCGFunc.CreateImmOperand(argsToStkPassSize, k32BitSize, true); + aarchCGFunc.SelectAdd(fpOpnd, spOpnd, immOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn( + RFP, static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - argsToStkPassSize, + k64BitSize)); + } else { + aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + cgFunc.GetCurBB()->AppendInsn(currCG->BuildInstruction(cfi::OP_CFI_def_cfa_register, + aarchCGFunc.CreateCfiRegOperand(RFP, k64BitSize))); + } + + MapleVector::const_iterator it = regsToSave.begin(); + /* skip the first two registers */ + CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP"); + ++it; + CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR"); + ++it; + + int32 offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + int saveareasize = RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize); + offset -= saveareasize; + } + + for (; it != regsToSave.end(); ++it) { + AArch64reg reg = *it; + CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?"); + CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?"); + + RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf; + if (firstHalf == kRinvalid) { + /* remember it */ + firstHalf = reg; + } else { + AppendInstructionPushPair(firstHalf, reg, regType, offset); + GetNextOffsetCalleeSaved(offset); + firstHalf = kRinvalid; + } + } + + if (intRegFirstHalf != kRinvalid) { + AppendInstructionPushSingle(intRegFirstHalf, kRegTyInt, offset); + GetNextOffsetCalleeSaved(offset); + } + + if (fpRegFirstHalf != kRinvalid) { + AppendInstructionPushSingle(fpRegFirstHalf, kRegTyFloat, offset); + GetNextOffsetCalleeSaved(offset); + } + + /* + * in case we split stp/ldp instructions, + * so that we generate a load-into-base-register instruction + * for pop pairs as well. + */ + aarchCGFunc.SetSplitBaseOffset(0); +} + +void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + AArch64MemLayout *memlayout = static_cast(cgFunc.GetMemlayout()); + uint32 dataSizeBits = kSizeOfPtr * kBitsPerByte; + int32 offset = memlayout->GetGRSaveAreaBaseLoc(); + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { + offset += kSizeOfPtr; /* End of area should be aligned. Hole between VR and GR area */ + } + int32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / kSizeOfPtr); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { + Operand &stackloc = aarchCGFunc.CreateStkTopOpnd(offset, dataSizeBits); + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = + currCG->BuildInstruction(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, stackloc); + cgFunc.GetCurBB()->AppendInsn(inst); + offset += kSizeOfPtr; + } + offset = memlayout->GetVRSaveAreaBaseLoc(); + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (kSizeOfPtr * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { + Operand &stackloc = aarchCGFunc.CreateStkTopOpnd(offset, dataSizeBits); + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = + currCG->BuildInstruction(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, stackloc); + cgFunc.GetCurBB()->AppendInsn(inst); + offset += (kSizeOfPtr * k2BitSize); + } + } +} + +void AArch64GenProEpilog::AppendInstructionStackCheck(AArch64reg reg, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + /* sub x16, sp, #0x2000 */ + auto &x16Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, rty); + auto &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, rty); + auto &imm1 = aarchCGFunc.CreateImmOperand(offset, k64BitSize, true); + aarchCGFunc.SelectSub(x16Opnd, spOpnd, imm1, PTY_u64); + + /* ldr wzr, [x16] */ + auto &wzr = AArch64RegOperand::Get32bitZeroRegister(); + auto &refX16 = aarchCGFunc.CreateMemOpnd(reg, 0, k64BitSize); + auto &soeInstr = currCG->BuildInstruction(MOP_wldr, wzr, refX16); + if (currCG->GenerateVerboseCG()) { + soeInstr.SetComment("soerror"); + } + soeInstr.SetDoNotRemove(true); + AppendInstructionTo(soeInstr, cgFunc); +} + +void AArch64GenProEpilog::GenerateProlog(BB &bb) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + BB *formerCurBB = cgFunc.GetCurBB(); + aarchCGFunc.GetDummyBB()->ClearInsns(); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(true); + cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB()); + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + if (!cgFunc.GetHasProEpilogue()) { + return; + } + + const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + if (!regsToSave.empty()) { + /* + * Among other things, push the FP & LR pair. + * FP/LR are added to the callee-saved list in AllocateRegisters() + * We add them to the callee-saved list regardless of UseFP() being true/false. + * Activation Frame is allocated as part of pushing FP/LR pair + */ + GeneratePushRegs(); + } else { + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + if (stackFrameSize > 0) { + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame")); + } + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + + int32 offset = stackFrameSize; + (void)InsertCFIDefCfaOffset(offset, *(cgFunc.GetCurBB()->GetLastInsn())); + } + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP")); + } + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + if (argsToStkPassSize > 0) { + Operand &immOpnd = aarchCGFunc.CreateImmOperand(argsToStkPassSize, k32BitSize, true); + aarchCGFunc.SelectAdd(fpOpnd, spOpnd, immOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn( + RFP, static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - argsToStkPassSize, + k64BitSize)); + } else { + aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + cgFunc.GetCurBB()->AppendInsn( + currCG->BuildInstruction(cfi::OP_CFI_def_cfa_register, + aarchCGFunc.CreateCfiRegOperand(RFP, k64BitSize))); + } + } + GeneratePushUnnamedVarargRegs(); + if (currCG->DoCheckSOE()) { + AppendInstructionStackCheck(R16, kRegTyInt, kSoeChckOffset); + } + bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB()); + cgFunc.SetCurBB(*formerCurBB); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(false); +} + +void AArch64GenProEpilog::GenerateRet(BB &bb) { + CG *currCG = cgFunc.GetCG(); + bb.AppendInsn(currCG->BuildInstruction(MOP_xret)); +} + +/* + * If all the preds of exitBB made the TailcallOpt(replace blr/bl with br/b), return true, we don't create ret insn. + * Otherwise, return false, create the ret insn. + */ +bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB) { + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + if (cgFunc.GetMirModule().IsCModule() && + (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || + ml->GetSizeOfLocals() > 0 || cgFunc.HasVLAOrAlloca())) { + return false; + } + for (auto tmpBB : exitBB.GetPreds()) { + Insn *firstInsn = tmpBB->GetFirstInsn(); + if ((firstInsn == nullptr || tmpBB->IsCommentBB()) && (!tmpBB->GetPreds().empty())) { + if (!TestPredsOfRetBB(*tmpBB)) { + return false; + } + } else { + Insn *lastInsn = tmpBB->GetLastInsn(); + if (lastInsn == nullptr) { + return false; + } + MOperator insnMop = lastInsn->GetMachineOpcode(); + if (insnMop != MOP_tail_call_opt_xbl && insnMop != MOP_tail_call_opt_xblr) { + return false; + } + } + } + return true; +} + +void AArch64GenProEpilog::AppendInstructionPopSingle(AArch64reg reg, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, kSizeOfPtr * kBitsPerByte, rty); + Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(offset, kSizeOfPtr * kBitsPerByte); + AArch64MemOperand *aarchMemO1 = static_cast(o1); + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) { + o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9); + } + + Insn &popInsn = currCG->BuildInstruction(mOp, o0, *o1); + popInsn.SetComment("RESTORE"); + cgFunc.GetCurBB()->AppendInsn(popInsn); + + /* Append CFI code. */ + if (!CGOptions::IsNoCalleeCFI()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg, k64BitSize)); + } +} + +void AArch64GenProEpilog::AppendInstructionPopPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(offset, kSizeOfPtr * kBitsPerByte); + + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + CHECK_FATAL(offset >= 0, "offset must >= 0"); + if (offset > kStpLdpImm64UpperBound) { + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(*static_cast(o2), dataSize, R16); + } + Insn &popInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + popInsn.SetComment("RESTORE RESTORE"); + cgFunc.GetCurBB()->AppendInsn(popInsn); + + /* Append CFI code */ + if (!CGOptions::IsNoCalleeCFI()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg0, k64BitSize)); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg1, k64BitSize)); + } +} + + +void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + /* + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ + bool useLdpAdd = false; + int64 offset = 0; + + Operand *o2 = nullptr; + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + o2 = aarchCGFunc.GetMemoryPool()->New(RSP, argsToStkPassSize, kSizeOfPtr * kBitsPerByte); + } else { + if (stackFrameSize > kStpLdpImm64UpperBound) { + useLdpAdd = true; + offset = kOffset16MemPos; + stackFrameSize -= offset; + } else { + offset = stackFrameSize; + } + o2 = &aarchCGFunc.CreateCallFrameOperand(offset, kSizeOfPtr * kBitsPerByte); + } + + if (useLdpAdd) { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + int64 cfiOffset = GetOffsetFromCFA(); + BB *curBB = cgFunc.GetCurBB(); + curBB->InsertInsnAfter(*(curBB->GetLastInsn()), + aarchCGFunc.CreateCfiDefCfaInsn(RSP, cfiOffset - stackFrameSize, k64BitSize)); + } + + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + CHECK_FATAL(!useLdpAdd, "Invalid assumption"); + if (argsToStkPassSize > kStpLdpImm64UpperBound) { + (void)AppendInstructionForAllocateOrDeallocateCallFrame(argsToStkPassSize, reg0, reg1, rty, false); + } else { + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + } + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + } else { + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + } + + /* Append CFI restore */ + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RFP, k64BitSize)); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); +} + +void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + /* + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ + if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0) { + stackFrameSize -= argsToStkPassSize; + if (stackFrameSize > kStpLdpImm64UpperBound) { + Operand *o2; + o2 = aarchCGFunc.GetMemoryPool()->New(RSP, 0, kSizeOfPtr * kBitsPerByte); + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + /* Append CFI restore */ + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RFP, k64BitSize)); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + } else { + AArch64MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, kSizeOfPtr * kBitsPerByte); + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RFP, k64BitSize)); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + } + } else { + Operand *o2; + o2 = aarchCGFunc.GetMemoryPool()->New(RSP, argsToStkPassSize, kSizeOfPtr * kBitsPerByte); + if (argsToStkPassSize > kStpLdpImm64UpperBound) { + (void)AppendInstructionForAllocateOrDeallocateCallFrame(argsToStkPassSize, reg0, reg1, rty, false); + } else { + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + } + + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RFP, k64BitSize)); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + } +} + +void AArch64GenProEpilog::GeneratePopRegs() { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + + CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?"); + + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers")); + } + + MapleVector::const_iterator it = regsToRestore.begin(); + /* + * Even if we don't use FP, since we push a pair of registers + * in a single instruction (i.e., stp) and the stack needs be aligned + * on a 16-byte boundary, push FP as well if the function has a call. + * Make sure this is reflected when computing calleeSavedRegs.size() + * skip the first two registers + */ + CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP"); + ++it; + CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR"); + ++it; + + int32 offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + int saveareasize = RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize); + offset -= saveareasize; + } + + /* + * We are using a cleared dummy block; so insertPoint cannot be ret; + * see GenerateEpilog() + */ + for (; it != regsToRestore.end(); ++it) { + AArch64reg reg = *it; + CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?"); + CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?"); + + RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf; + if (firstHalf == kRinvalid) { + /* remember it */ + firstHalf = reg; + } else { + /* flush the pair */ + AppendInstructionPopPair(firstHalf, reg, regType, offset); + GetNextOffsetCalleeSaved(offset); + firstHalf = kRinvalid; + } + } + + if (intRegFirstHalf != kRinvalid) { + AppendInstructionPopSingle(intRegFirstHalf, kRegTyInt, offset); + GetNextOffsetCalleeSaved(offset); + } + + if (fpRegFirstHalf != kRinvalid) { + AppendInstructionPopSingle(fpRegFirstHalf, kRegTyFloat, offset); + GetNextOffsetCalleeSaved(offset); + } + + if (!currCG->GenerateDebugFriendlyCode()) { + AppendInstructionDeallocateCallFrame(RFP, RLR, kRegTyInt); + } else { + AppendInstructionDeallocateCallFrameDebug(RFP, RLR, kRegTyInt); + } + + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize)); + /* + * in case we split stp/ldp instructions, + * so that we generate a load-into-base-register instruction + * for the next function, maybe? (seems not necessary, but...) + */ + aarchCGFunc.SetSplitBaseOffset(0); +} + +void AArch64GenProEpilog::AppendJump(const MIRSymbol &funcSymbol) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + Operand &targetOpnd = aarchCGFunc.CreateFuncLabelOperand(funcSymbol); + cgFunc.GetCurBB()->AppendInsn(currCG->BuildInstruction(MOP_xuncond, targetOpnd)); +} + +void AArch64GenProEpilog::GenerateEpilog(BB &bb) { + if (!cgFunc.GetHasProEpilogue()) { + if (bb.GetPreds().empty() || !TestPredsOfRetBB(bb)) { + GenerateRet(bb); + } + return; + } + + /* generate stack protected instruction */ + BB &epilogBB = GenStackGuardCheckInsn(bb); + + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + BB *formerCurBB = cgFunc.GetCurBB(); + aarchCGFunc.GetDummyBB()->ClearInsns(); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(true); + cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB()); + + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + + if (cgFunc.HasVLAOrAlloca()) { + aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64); + } + + /* Hack: exit bb should always be reachable, since we need its existance for ".cfi_remember_state" */ + if (&epilogBB != cgFunc.GetLastBB() && epilogBB.GetNext() != nullptr) { + BB *nextBB = epilogBB.GetNext(); + do { + if (nextBB == cgFunc.GetLastBB() || !nextBB->IsEmpty()) { + break; + } + nextBB = nextBB->GetNext(); + } while (nextBB != nullptr); + if (nextBB != nullptr && !nextBB->IsEmpty()) { + cgFunc.GetCurBB()->AppendInsn(currCG->BuildInstruction(cfi::OP_CFI_remember_state)); + nextBB->InsertInsnBefore(*nextBB->GetFirstInsn(), + currCG->BuildInstruction(cfi::OP_CFI_restore_state)); + } + } + + const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + if (!regsToSave.empty()) { + GeneratePopRegs(); + } else { + int32 stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + if (stackFrameSize > 0) { + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame")); + } + + if (cgFunc.HasVLAOrAlloca()) { + stackFrameSize -= static_cast(cgFunc.GetMemlayout())->GetSegArgsStkPass().GetSize(); + } + + if (stackFrameSize > 0) { + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize)); + } + } + } + + if (currCG->InstrumentWithDebugTraceCall()) { + AppendJump(*(currCG->GetDebugTraceExitFunction())); + } + + GenerateRet(*(cgFunc.GetCurBB())); + epilogBB.AppendBBInsns(*cgFunc.GetCurBB()); + + cgFunc.SetCurBB(*formerCurBB); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(false); +} + +void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + CHECK_FATAL(!cgFunc.GetExitBBsVec().empty(), "exit bb size is zero!"); + if (cgFunc.GetExitBB(0)->IsUnreachable()) { + /* if exitbb is unreachable then exitbb can not be generated */ + GenerateEpilog(bb); + } else if (aarchCGFunc.NeedCleanup()) { /* bl to the exit epilogue */ + LabelOperand &targetOpnd = aarchCGFunc.GetOrCreateLabelOperand(cgFunc.GetExitBB(0)->GetLabIdx()); + bb.AppendInsn(currCG->BuildInstruction(MOP_xuncond, targetOpnd)); + } +} + +void AArch64GenProEpilog::Run() { + CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label, + "The first statement should be a label"); + cgFunc.SetHasProEpilogue(true); + if (cgFunc.GetHasProEpilogue()) { + GenStackGuard(*(cgFunc.GetFirstBB())); + } + BB *proLog = nullptr; + if (Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel2) { + /* There are some O2 dependent assumptions made */ + proLog = IsolateFastPath(*(cgFunc.GetFirstBB())); + } + + if (cgFunc.IsExitBBsVecEmpty()) { + if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() && + cgFunc.GetLastBB()->GetPrev()->GetPrev()) { + cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev()->GetPrev()); + } else { + cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev()); + } + } + + if (proLog != nullptr) { + GenerateProlog(*proLog); + proLog->SetFastPath(true); + cgFunc.GetFirstBB()->SetFastPath(true); + } else { + GenerateProlog(*(cgFunc.GetFirstBB())); + } + + for (auto *exitBB : cgFunc.GetExitBBsVec()) { + GenerateEpilog(*exitBB); + } + + if (cgFunc.GetFunction().IsJava()) { + GenerateEpilogForCleanup(*(cgFunc.GetCleanupBB())); + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_reaching.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_reaching.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3807f3f4132e3d064834d77ce1d7ef4c208865af --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_reaching.cpp @@ -0,0 +1,953 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_reaching.h" +#include "riscv64_cg.h" +#include "riscv64_operand.h" +namespace maplebe { +/* MCC_ClearLocalStackRef clear 1 stack slot, and MCC_DecRefResetPair clear 2 stack slot, + * the stack positins cleared are recorded in callInsn->clearStackOffset + */ +constexpr short kFirstClearMemIndex = 0; +constexpr short kSecondClearMemIndex = 1; + +/* insert pseudo insn for parameters definition */ +void AArch64ReachingDefinition::InitStartGen() { + BB *bb = cgFunc->GetFirstBB(); + + /* Parameters should be define first. */ + ParmLocator parmLocator(cgFunc->GetBecommon()); + PLocInfo pLoc; + for (uint32 i = 0; i < cgFunc->GetFunction().GetFormalCount(); ++i) { + MIRType *type = cgFunc->GetFunction().GetNthParamType(i); + parmLocator.LocateNextParm(*type, pLoc); + if (pLoc.reg0 == 0) { + /* If is a large frame, parameter addressing mode is based vreg:Vra. */ + continue; + } + + uint64 symSize = cgFunc->GetBecommon().GetTypeSize(type->GetTypeIndex()); + if ((cgFunc->GetMirModule().GetSrcLang() == kSrcLangC) && (symSize > k8ByteSize)) { + /* For C structure passing in one or two registers. */ + symSize = k8ByteSize; + } + RegType regType = (pLoc.reg0 < V0) ? kRegTyInt : kRegTyFloat; + uint32 srcBitSize = ((symSize < k4ByteSize) ? k4ByteSize : symSize) * kBitsPerByte; + + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + RegOperand ®Opnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg0, srcBitSize, regType); + + MOperator mOp; + if (regType == kRegTyInt) { + if (srcBitSize <= k32BitSize) { + mOp = MOP_pseudo_param_def_w; + } else { + mOp = MOP_pseudo_param_def_x; + } + } else { + if (srcBitSize <= k32BitSize) { + mOp = MOP_pseudo_param_def_s; + } else { + mOp = MOP_pseudo_param_def_d; + } + } + + Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd); + bb->InsertInsnBegin(pseudoInsn); + pseudoInsns.emplace_back(&pseudoInsn); + + if (pLoc.reg1) { + regOpnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg1, srcBitSize, regType); + Insn &pseudoInsn1 = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd); + bb->InsertInsnBegin(pseudoInsn1); + pseudoInsns.emplace_back(&pseudoInsn1); + } + + { + /* + * define memory address since store param may be transfered to stp and which with the short offset range. + * we can not get the correct definition before RA. + * example: + * add x8, sp, #712 + * stp x0, x1, [x8] // store param: _this Reg40_R313644 + * stp x2, x3, [x8,#16] // store param: Reg41_R333743 Reg42_R333622 + * stp x4, x5, [x8,#32] // store param: Reg43_R401297 Reg44_R313834 + * str x7, [x8,#48] // store param: Reg46_R401297 + */ + MIRSymbol *sym = cgFunc->GetFunction().GetFormal(i); + if (!sym->IsPreg()) { + MIRSymbol *firstSym = cgFunc->GetFunction().GetFormal(i); + const AArch64SymbolAlloc *firstSymLoc = + static_cast(cgFunc->GetMemlayout()->GetSymAllocInfo(firstSym->GetStIndex())); + int32 stOffset = cgFunc->GetBaseOffset(*firstSymLoc); + MIRType *firstType = cgFunc->GetFunction().GetNthParamType(i); + uint32 firstSymSize = cgFunc->GetBecommon().GetTypeSize(firstType->GetTypeIndex()); + uint32 firstStackSize = firstSymSize < k4ByteSize ? k4ByteSize : firstSymSize; + + AArch64MemOperand *memOpnd = cgFunc->GetMemoryPool()->New(RFP, stOffset, + firstStackSize * kBitsPerByte); + MOperator mopTemp = firstStackSize <= k4ByteSize ? MOP_pseudo_param_store_w : MOP_pseudo_param_store_x; + Insn &pseudoInsnTemp = cgFunc->GetCG()->BuildInstruction(mopTemp, *memOpnd); + bb->InsertInsnBegin(pseudoInsnTemp); + pseudoInsns.emplace_back(&pseudoInsnTemp); + } + } + } + + /* if function has "bl MCC_InitializeLocalStackRef", should define corresponding memory. */ + AArch64CGFunc *a64CGFunc = static_cast(cgFunc); + + for (uint32 i = 0; i < a64CGFunc->GetRefCount(); ++i) { + AArch64MemOperand *memOpnd = cgFunc->GetMemoryPool()->New( + RFP, a64CGFunc->GetBeginOffset() + i * k8BitSize, k64BitSize); + Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_ref_init_x, *memOpnd); + + bb->InsertInsnBegin(pseudoInsn); + pseudoInsns.emplace_back(&pseudoInsn); + } +} + +/* insert pseudoInsns for ehBB, R0 and R1 are defined in pseudoInsns */ +void AArch64ReachingDefinition::InitEhDefine(BB &bb) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + /* Insert MOP_pseudo_eh_def_x R1. */ + RegOperand ®Opnd1 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, kRegTyInt); + Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_eh_def_x, regOpnd1); + bb.InsertInsnBegin(pseudoInsn); + pseudoInsns.emplace_back(&pseudoInsn); + + /* insert MOP_pseudo_eh_def_x R0. */ + RegOperand ®Opnd2 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(R0, k64BitSize, kRegTyInt); + Insn &newPseudoInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_eh_def_x, regOpnd2); + bb.InsertInsnBegin(newPseudoInsn); + pseudoInsns.emplace_back(&newPseudoInsn); +} + +/* insert pseudoInsns for return value R0/V0 */ +void AArch64ReachingDefinition::AddRetPseudoInsn(BB &bb) { + AArch64reg regNO = static_cast(cgFunc)->GetReturnRegisterNumber(); + if (regNO == kInvalidRegNO) { + return; + } + + if (regNO == R0) { + RegOperand ®Opnd = + static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand(regNO, k64BitSize, kRegTyInt); + Insn &retInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_ret_int, regOpnd); + bb.AppendInsn(retInsn); + pseudoInsns.emplace_back(&retInsn); + } else { + ASSERT(regNO == V0, "CG internal error. Return value should be R0 or V0."); + RegOperand ®Opnd = + static_cast(cgFunc)->GetOrCreatePhysicalRegisterOperand(regNO, k64BitSize, kRegTyFloat); + Insn &retInsn = cgFunc->GetCG()->BuildInstruction(MOP_pseudo_ret_float, regOpnd); + bb.AppendInsn(retInsn); + pseudoInsns.emplace_back(&retInsn); + } +} + +void AArch64ReachingDefinition::AddRetPseudoInsns() { + uint32 exitBBSize = cgFunc->GetExitBBsVec().size(); + if (exitBBSize == 0) { + if (cgFunc->GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc->GetCleanupLabel() && + cgFunc->GetLastBB()->GetPrev()->GetPrev()) { + AddRetPseudoInsn(*cgFunc->GetLastBB()->GetPrev()->GetPrev()); + } else { + AddRetPseudoInsn(*cgFunc->GetLastBB()->GetPrev()); + } + } else { + for (uint32 i = 0; i < exitBBSize; ++i) { + AddRetPseudoInsn(*cgFunc->GetExitBB(i)); + } + } +} + +/* all caller saved register are modified by call insn */ +void AArch64ReachingDefinition::GenAllCallerSavedRegs(BB &bb) { + for (uint32 i = R0; i <= V31; ++i) { + if (IsCallerSavedReg(i)) { + regGen[bb.GetId()]->SetBit(i); + } + } +} + +/* + * find definition for register between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + */ +std::vector AArch64ReachingDefinition::FindRegDefBetweenInsn(uint32 regNO, Insn *startInsn, + Insn *endInsn) const { + std::vector defInsnVec; + if (startInsn == nullptr || endInsn == nullptr) { + return defInsnVec; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "two insns must be in a same BB"); + if (!regGen[startInsn->GetBB()->GetId()]->TestBit(regNO)) { + return defInsnVec; + } + + for (Insn *insn = endInsn; insn != nullptr && insn != startInsn->GetPrev(); insn = insn->GetPrev()) { + if (!insn->IsMachineInstruction()) { + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + if (insn->IsCall() && IsCallerSavedReg(regNO)) { + defInsnVec.emplace_back(insn); + return defInsnVec; + } + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isDef = regProp->IsDef(); + if (!isDef && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsList()) { + CHECK_FATAL(false, "Internal error, list operand should not be defined."); + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + + if (base != nullptr) { + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && + base->GetRegisterNumber() == regNO) { + defInsnVec.emplace_back(insn); + return defInsnVec; + } + } + } else if ((opnd.IsConditionCode() || opnd.IsRegister()) && + (static_cast(opnd).GetRegisterNumber() == regNO)) { + defInsnVec.emplace_back(insn); + return defInsnVec; + } + } + } + return defInsnVec; +} + +void AArch64ReachingDefinition::FindRegDefInBB(uint32 regNO, BB &bb, InsnSet &defInsnSet) const { + if (!regGen[bb.GetId()]->TestBit(regNO)) { + return; + } + + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + if (insn->IsCall() && IsCallerSavedReg(regNO)) { + (void)defInsnSet.insert(insn); + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->GetOperand(i)); + bool isDef = regProp->IsDef(); + if (!isDef && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsList()) { + ASSERT(false, "Internal error, list operand should not be defined."); + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + + if (base != nullptr) { + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && + base->GetRegisterNumber() == regNO) { + (void)defInsnSet.insert(insn); + } + } + } else if ((opnd.IsConditionCode() || opnd.IsRegister()) && + (static_cast(opnd).GetRegisterNumber() == regNO)) { + (void)defInsnSet.insert(insn); + } + } + } +} + +/* check whether call insn changed the stack status or not. */ +bool AArch64ReachingDefinition::CallInsnClearDesignateStackRef(const Insn &callInsn, int64 offset) const { + return offset == callInsn.GetClearStackOffset(kFirstClearMemIndex) || + offset == callInsn.GetClearStackOffset(kSecondClearMemIndex); +} + +/* + * find definition for stack memory operand between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + * special case: + * MCC_ClearLocalStackRef clear designate stack position, the designate stack position is thought defined + * for example: + * add x0, x29, #24 + * bl MCC_ClearLocalStackRef + */ +std::vector AArch64ReachingDefinition::FindMemDefBetweenInsn(uint32 offset, const Insn *startInsn, + Insn *endInsn) const { + std::vector defInsnVec; + if (startInsn == nullptr || endInsn == nullptr) { + return defInsnVec; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "two insns must be in a same BB"); + if (!memGen[startInsn->GetBB()->GetId()]->TestBit(offset / kMemZoomSize)) { + return defInsnVec; + } + + for (Insn *insn = endInsn; insn != nullptr && insn != startInsn->GetPrev(); insn = insn->GetPrev()) { + if (!insn->IsMachineInstruction()) { + continue; + } + + if (insn->IsCall()) { + if (CallInsnClearDesignateStackRef(*insn, offset)) { + defInsnVec.emplace_back(insn); + return defInsnVec; + } + continue; + } + + if (!(insn->IsStore() || insn->IsPseudoInstruction())) { + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + + if (base == nullptr || !IsFrameReg(*base) || index != nullptr) { + break; + } + + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffset = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if ((offset == memOffset) || + (insn->IsStorePair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode()))) { + defInsnVec.emplace_back(insn); + return defInsnVec; + } + } + } + } + return defInsnVec; +} + +void AArch64ReachingDefinition::FindMemDefInBB(uint32 offset, BB &bb, InsnSet &defInsnSet) const { + if (!memGen[bb.GetId()]->TestBit(offset / kMemZoomSize)) { + return; + } + + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + continue; + } + + if (insn->IsCall()) { + if (CallInsnClearDesignateStackRef(*insn, offset)) { + (void)defInsnSet.insert(insn); + } + continue; + } + + if (!(insn->IsStore() || insn->IsPseudoInstruction())) { + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + + if (base == nullptr || !IsFrameReg(*base) || index != nullptr) { + break; + } + + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffset = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if (offset == memOffset) { + (void)defInsnSet.insert(insn); + break; + } + if (insn->IsStorePair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode())) { + (void)defInsnSet.insert(insn); + break; + } + } + } + } +} + +/* + * find defininition for register Iteratively. + * input: + * startBB: find definnition starting from startBB + * regNO: the No of register to be find + * visitedBB: record these visited BB + * defInsnSet: insn defining register is saved in this set + */ +void AArch64ReachingDefinition::DFSFindDefForRegOpnd(const BB &startBB, uint32 regNO, + std::vector &visitedBB, InsnSet &defInsnSet) const { + std::vector defInsnVec; + for (auto predBB : startBB.GetPreds()) { + if (visitedBB[predBB->GetId()] != kNotVisited) { + continue; + } + visitedBB[predBB->GetId()] = kNormalVisited; + if (regGen[predBB->GetId()]->TestBit(regNO)) { + defInsnVec.clear(); + defInsnVec = FindRegDefBetweenInsn(regNO, predBB->GetFirstInsn(), predBB->GetLastInsn()); + ASSERT(!defInsnVec.empty(), "opnd must be defined in this bb"); + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + } else if (regIn[predBB->GetId()]->TestBit(regNO)) { + DFSFindDefForRegOpnd(*predBB, regNO, visitedBB, defInsnSet); + } + } + + for (auto predEhBB : startBB.GetEhPreds()) { + if (visitedBB[predEhBB->GetId()] == kEHVisited) { + continue; + } + visitedBB[predEhBB->GetId()] = kEHVisited; + if (regGen[predEhBB->GetId()]->TestBit(regNO)) { + FindRegDefInBB(regNO, *predEhBB, defInsnSet); + } + + if (regIn[predEhBB->GetId()]->TestBit(regNO)) { + DFSFindDefForRegOpnd(*predEhBB, regNO, visitedBB, defInsnSet); + } + } +} + +/* + * find defininition for stack memory iteratively. + * input: + * startBB: find definnition starting from startBB + * offset: the offset of memory to be find + * visitedBB: record these visited BB + * defInsnSet: insn defining register is saved in this set + */ +void AArch64ReachingDefinition::DFSFindDefForMemOpnd(const BB &startBB, uint32 offset, + std::vector &visitedBB, InsnSet &defInsnSet) const { + std::vector defInsnVec; + for (auto predBB : startBB.GetPreds()) { + if (visitedBB[predBB->GetId()] != kNotVisited) { + continue; + } + visitedBB[predBB->GetId()] = kNormalVisited; + if (memGen[predBB->GetId()]->TestBit(offset / kMemZoomSize)) { + defInsnVec.clear(); + defInsnVec = FindMemDefBetweenInsn(offset, predBB->GetFirstInsn(), predBB->GetLastInsn()); + ASSERT(!defInsnVec.empty(), "opnd must be defined in this bb"); + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + } else if (memIn[predBB->GetId()]->TestBit(offset / kMemZoomSize)) { + DFSFindDefForMemOpnd(*predBB, offset, visitedBB, defInsnSet); + } + } + + for (auto predEhBB : startBB.GetEhPreds()) { + if (visitedBB[predEhBB->GetId()] == kEHVisited) { + continue; + } + visitedBB[predEhBB->GetId()] = kEHVisited; + if (memGen[predEhBB->GetId()]->TestBit(offset / kMemZoomSize)) { + FindMemDefInBB(offset, *predEhBB, defInsnSet); + } + + if (memIn[predEhBB->GetId()]->TestBit(offset / kMemZoomSize)) { + DFSFindDefForMemOpnd(*predEhBB, offset, visitedBB, defInsnSet); + } + } +} + +/* + * find defininition for register. + * input: + * insn: the insn in which register is used + * indexOrRegNO: the index of register in insn or the No of register to be find + * isRegNO: if indexOrRegNO is index, this argument is false, else is true + * return: + * the set of definition insns for register + */ +InsnSet AArch64ReachingDefinition::FindDefForRegOpnd(Insn &insn, uint32 indexOrRegNO, bool isRegNO) const { + uint32 regNO = indexOrRegNO; + if (!isRegNO) { + Operand &opnd = insn.GetOperand(indexOrRegNO); + auto ®Opnd = static_cast(opnd); + regNO = regOpnd.GetRegisterNumber(); + } + + std::vector defInsnVec; + if (regGen[insn.GetBB()->GetId()]->TestBit(regNO)) { + defInsnVec = FindRegDefBetweenInsn(regNO, insn.GetBB()->GetFirstInsn(), insn.GetPrev()); + } + InsnSet defInsnSet; + if (!defInsnVec.empty()) { + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + return defInsnSet; + } + std::vector visitedBB(kMaxBBNum, kNotVisited); + if (insn.GetBB()->IsCleanup()) { + DFSFindDefForRegOpnd(*insn.GetBB(), regNO, visitedBB, defInsnSet); + if (defInsnSet.empty()) { + FOR_ALL_BB(bb, cgFunc) { + if (bb->IsCleanup()) { + continue; + } + if (regGen[bb->GetId()]->TestBit(regNO)) { + FindRegDefInBB(regNO, *bb, defInsnSet); + } + } + } + } else { + DFSFindDefForRegOpnd(*insn.GetBB(), regNO, visitedBB, defInsnSet); + } + return defInsnSet; +} + +/* + * find insn using register between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + */ +bool AArch64ReachingDefinition::FindRegUseBetweenInsn(uint32 regNO, Insn *startInsn, Insn *endInsn, + InsnSet ®UseInsnSet) const { + bool findFinish = false; + if (startInsn == nullptr || endInsn == nullptr) { + return findFinish; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "two insns must be in a same BB"); + + for (Insn *insn = startInsn; insn != nullptr && insn != endInsn->GetNext(); insn = insn->GetNext()) { + if (!insn->IsMachineInstruction()) { + continue; + } + /* if insn is call and regNO is caller-saved register, then regNO will not be used later */ + if (insn->IsCall() && IsCallerSavedReg(regNO)) { + findFinish = true; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto listElem : listOpnd.GetOperands()) { + RegOperand *regOpnd = static_cast(listElem); + ASSERT(regOpnd != nullptr, "parameter operand must be RegOperand"); + if (regNO == regOpnd->GetRegisterNumber()) { + (void)regUseInsnSet.insert(insn); + } + } + continue; + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *baseOpnd = memOpnd.GetBaseRegister(); + if (baseOpnd != nullptr && + (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi) && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed()) && + baseOpnd->GetRegisterNumber() == regNO) { + findFinish = true; + } + } + + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (regProp->IsDef() && + (opnd.IsConditionCode() || opnd.IsRegister()) && + (static_cast(opnd).GetRegisterNumber() == regNO)) { + findFinish = true; + } + + if (!regProp->IsUse() && !opnd.IsMemoryAccessOperand()) { + continue; + } + + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + if ((base != nullptr && base->GetRegisterNumber() == regNO) || + (index != nullptr && index->GetRegisterNumber() == regNO)) { + (void)regUseInsnSet.insert(insn); + } + } else if (opnd.IsConditionCode()) { + Operand &rflagOpnd = cgFunc->GetOrCreateRflag(); + RegOperand &rflagReg = static_cast(rflagOpnd); + if (rflagReg.GetRegisterNumber() == regNO) { + (void)regUseInsnSet.insert(insn); + } + } else if (opnd.IsRegister() && (static_cast(opnd).GetRegisterNumber() == regNO)) { + (void)regUseInsnSet.insert(insn); + } + } + + if (findFinish) { + break; + } + } + return findFinish; +} + +/* + * find insn using stack memory operand between startInsn and endInsn. + * startInsn and endInsn must be in same BB and startInsn and endInsn are included + */ +bool AArch64ReachingDefinition::FindMemUseBetweenInsn(uint32 offset, Insn *startInsn, const Insn *endInsn, + InsnSet &memUseInsnSet) const { + bool findFinish = false; + if (startInsn == nullptr || endInsn == nullptr) { + return findFinish; + } + + ASSERT(startInsn->GetBB() == endInsn->GetBB(), "two insns must be in a same BB"); + ASSERT(endInsn->GetId() >= startInsn->GetId(), "end ID must be greater than or equal to start ID"); + + for (Insn *insn = startInsn; insn != nullptr && insn != endInsn->GetNext(); insn = insn->GetNext()) { + if (!insn->IsMachineInstruction()) { + continue; + } + + if (insn->IsCall()) { + if (CallInsnClearDesignateStackRef(*insn, offset)) { + return true; + } + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + if (!opnd.IsMemoryAccessOperand()) { + continue; + } + + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if (base == nullptr || !IsFrameReg(*base)) { + continue; + } + + ASSERT(memOpnd.GetIndexRegister() == nullptr, "offset must not be Register for frame MemOperand"); + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffset = memOpnd.GetOffsetImmediate()->GetValue(); + + if (insn->IsStore() || insn->IsPseudoInstruction()) { + if (memOffset == offset) { + findFinish = true; + continue; + } + if (insn->IsStorePair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode())) { + findFinish = true; + continue; + } + } + + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isUse = regProp->IsUse(); + if (!isUse) { + continue; + } + + if (offset == memOffset) { + (void)memUseInsnSet.insert(insn); + } else if (insn->IsLoadPair() && offset == memOffset + GetEachMemSizeOfPair(insn->GetMachineOpcode())) { + (void)memUseInsnSet.insert(insn); + } + } + + if (findFinish) { + break; + } + } + return findFinish; +} + +/* find all definition for stack memory operand insn.opnd[index] */ +InsnSet AArch64ReachingDefinition::FindDefForMemOpnd(Insn &insn, uint32 indexOrOffset, bool isOffset) const { + InsnSet defInsnSet; + int64 memOffSet = 0; + if (!isOffset) { + Operand &opnd = insn.GetOperand(indexOrOffset); + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be MemOperand"); + + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *indexReg = memOpnd.GetIndexRegister(); + + if (base == nullptr || !IsFrameReg(*base) || indexReg) { + return defInsnSet; + } + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + memOffSet = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + } else { + memOffSet = indexOrOffset; + } + std::vector defInsnVec; + if (memGen[insn.GetBB()->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + defInsnVec = FindMemDefBetweenInsn(memOffSet, insn.GetBB()->GetFirstInsn(), insn.GetPrev()); + } + + if (!defInsnVec.empty()) { + defInsnSet.insert(defInsnVec.begin(), defInsnVec.end()); + return defInsnSet; + } + std::vector visitedBB(kMaxBBNum, kNotVisited); + if (insn.GetBB()->IsCleanup()) { + DFSFindDefForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, defInsnSet); + if (defInsnSet.empty()) { + FOR_ALL_BB(bb, cgFunc) { + if (bb->IsCleanup()) { + continue; + } + + if (memGen[bb->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + FindMemDefInBB(memOffSet, *bb, defInsnSet); + } + } + } + } else { + DFSFindDefForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, defInsnSet); + } + + return defInsnSet; +} + +/* + * find all insn using stack memory operand insn.opnd[index] + * secondMem is used to represent the second stack memory opernad in store pair insn + */ +InsnSet AArch64ReachingDefinition::FindUseForMemOpnd(Insn &insn, uint8 index, bool secondMem) const { + Operand &opnd = insn.GetOperand(index); + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be MemOperand"); + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + + InsnSet useInsnSet; + if (base == nullptr || !IsFrameReg(*base)) { + return useInsnSet; + } + + ASSERT(memOpnd.GetIndexRegister() == nullptr, "IndexRegister no nullptr"); + ASSERT(memOpnd.GetOffsetImmediate() != nullptr, "offset must be a immediate value"); + int64 memOffSet = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if (secondMem) { + ASSERT(insn.IsStorePair(), "second MemOperand can only be defined in stp insn"); + memOffSet += GetEachMemSizeOfPair(insn.GetMachineOpcode()); + } + /* memOperand may be redefined in current BB */ + bool findFinish = FindMemUseBetweenInsn(memOffSet, insn.GetNext(), insn.GetBB()->GetLastInsn(), useInsnSet); + std::vector visitedBB(kMaxBBNum, false); + if (findFinish || !memOut[insn.GetBB()->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + if (insn.GetBB()->GetEhSuccs().size() != 0) { + DFSFindUseForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, useInsnSet, true); + } + } else { + DFSFindUseForMemOpnd(*insn.GetBB(), memOffSet, visitedBB, useInsnSet, false); + } + if (!insn.GetBB()->IsCleanup() && firstCleanUpBB) { + if (memUse[firstCleanUpBB->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + findFinish = FindMemUseBetweenInsn(memOffSet, firstCleanUpBB->GetFirstInsn(), + firstCleanUpBB->GetLastInsn(), useInsnSet); + if (findFinish || !memOut[firstCleanUpBB->GetId()]->TestBit(memOffSet / kMemZoomSize)) { + return useInsnSet; + } + } + DFSFindUseForMemOpnd(*firstCleanUpBB, memOffSet, visitedBB, useInsnSet, false); + } + return useInsnSet; +} + +/* + * initialize bb.gen and bb.use + * if it is not computed in first time, bb.gen and bb.use must be cleared firstly + */ +void AArch64ReachingDefinition::InitGenUse(BB &bb, bool firstTime) { + if (!firstTime && (mode & kRDRegAnalysis)) { + regGen[bb.GetId()]->ResetAllBit(); + regUse[bb.GetId()]->ResetAllBit(); + } + if (!firstTime && (mode & kRDMemAnalysis)) { + memGen[bb.GetId()]->ResetAllBit(); + memUse[bb.GetId()]->ResetAllBit(); + } + + if (bb.IsEmpty()) { + return; + } + + FOR_BB_INSNS(insn, (&bb)) { + if (!insn->IsMachineInstruction()) { + continue; + } + if (insn->IsCall()) { + GenAllCallerSavedRegs(bb); + InitMemInfoForClearStackCall(*insn); + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (opnd.IsList() && (mode & kRDRegAnalysis)) { + ASSERT(regProp->IsUse(), "ListOperand is used in insn"); + InitInfoForListOpnd(bb, opnd); + } else if (opnd.IsMemoryAccessOperand()) { + InitInfoForMemOperand(*insn, opnd, regProp->IsDef()); + } else if (opnd.IsConditionCode() && (mode & kRDRegAnalysis)) { + ASSERT(regProp->IsUse(), "condition code is used in insn"); + InitInfoForConditionCode(bb); + } else if (opnd.IsRegister() && (mode & kRDRegAnalysis)) { + InitInfoForRegOpnd(bb, opnd, regProp->IsDef()); + } + } + } +} + +void AArch64ReachingDefinition::InitMemInfoForClearStackCall(Insn &callInsn) { + if (!(mode & kRDMemAnalysis) || !callInsn.IsClearDesignateStackCall()) { + return; + } + int64 firstOffset = callInsn.GetClearStackOffset(kFirstClearMemIndex); + constexpr int64 defaultValOfClearMemOffset = -1; + if (firstOffset != defaultValOfClearMemOffset) { + memGen[callInsn.GetBB()->GetId()]->SetBit(firstOffset / kMemZoomSize); + } + int64 secondOffset = callInsn.GetClearStackOffset(kSecondClearMemIndex); + if (secondOffset != defaultValOfClearMemOffset) { + memGen[callInsn.GetBB()->GetId()]->SetBit(secondOffset / kMemZoomSize); + } +} + +void AArch64ReachingDefinition::InitInfoForMemOperand(Insn &insn, Operand &opnd, bool isDef) { + ASSERT(opnd.IsMemoryAccessOperand(), "opnd must be MemOperand"); + AArch64MemOperand &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + + if (base == nullptr) { + return; + } + if ((mode & kRDMemAnalysis) && IsFrameReg(*base)) { + CHECK_FATAL(index == nullptr, "Existing [x29 + index] Memory Address"); + ASSERT(memOpnd.GetOffsetImmediate(), "offset must be a immediate value"); + int32 offsetVal = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if ((offsetVal % kMemZoomSize) != 0) { + SetAnalysisMode(kRDRegAnalysis); + } + + if (!isDef) { + memUse[insn.GetBB()->GetId()]->SetBit(offsetVal / kMemZoomSize); + if (insn.IsLoadPair()) { + int64 nextMemOffset = offsetVal + GetEachMemSizeOfPair(insn.GetMachineOpcode()); + memUse[insn.GetBB()->GetId()]->SetBit(nextMemOffset / kMemZoomSize); + } + } else if (isDef) { + memGen[insn.GetBB()->GetId()]->SetBit(offsetVal / kMemZoomSize); + if (insn.IsStorePair()) { + int64 nextMemOffset = offsetVal + GetEachMemSizeOfPair(insn.GetMachineOpcode()); + memGen[insn.GetBB()->GetId()]->SetBit(nextMemOffset / kMemZoomSize); + } + } + } + + if (mode & kRDRegAnalysis) { + regUse[insn.GetBB()->GetId()]->SetBit(base->GetRegisterNumber()); + if (index != nullptr) { + regUse[insn.GetBB()->GetId()]->SetBit(index->GetRegisterNumber()); + } + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + /* Base operand has changed. */ + regGen[insn.GetBB()->GetId()]->SetBit(base->GetRegisterNumber()); + } + } +} + +void AArch64ReachingDefinition::InitInfoForListOpnd(const BB &bb, Operand &opnd) { + ListOperand *listOpnd = static_cast(&opnd); + for (auto listElem : listOpnd->GetOperands()) { + RegOperand *regOpnd = static_cast(listElem); + ASSERT(regOpnd != nullptr, "used Operand in call insn must be Register"); + regUse[bb.GetId()]->SetBit(regOpnd->GetRegisterNumber()); + } +} + +void AArch64ReachingDefinition::InitInfoForConditionCode(const BB &bb) { + Operand &rflagOpnd = cgFunc->GetOrCreateRflag(); + RegOperand &rflagReg = static_cast(rflagOpnd); + regUse[bb.GetId()]->SetBit(rflagReg.GetRegisterNumber()); +} + +void AArch64ReachingDefinition::InitInfoForRegOpnd(const BB &bb, Operand &opnd, bool isDef) { + RegOperand *regOpnd = static_cast(&opnd); + if (!isDef) { + regUse[bb.GetId()]->SetBit(regOpnd->GetRegisterNumber()); + } else { + regGen[bb.GetId()]->SetBit(regOpnd->GetRegisterNumber()); + } +} + +int32 AArch64ReachingDefinition::GetStackSize() const { + const int sizeofFplr = kDivide2 * kIntregBytelen; + return static_cast(cgFunc->GetMemlayout())->RealStackFrameSize() + sizeofFplr; +} + +bool AArch64ReachingDefinition::IsCallerSavedReg(uint32 regNO) const { + return (R0 <= regNO && regNO <= R18) || (V0 <= regNO && regNO <= V7) || (V16 <= regNO && regNO <= V31); +} + +int64 AArch64ReachingDefinition::GetEachMemSizeOfPair(MOperator opCode) const { + switch (opCode) { + case MOP_wstp: + case MOP_sstp: + case MOP_wstlxp: + case MOP_wldp: + case MOP_xldpsw: + case MOP_sldp: + case MOP_wldaxp: + return kWordByteNum; + case MOP_xstp: + case MOP_dstp: + case MOP_xstlxp: + case MOP_xldp: + case MOP_dldp: + case MOP_xldaxp: + return kDoubleWordByteNum; + default: + return 0; + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_reg_alloc.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_reg_alloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..666d3ce8e70dd5fc7ba9a18b47362d4b58190397 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_reg_alloc.cpp @@ -0,0 +1,691 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_reg_alloc.h" +#include "riscv64_lsra.h" +#include "riscv64_color_ra.h" +#include "riscv64_cg.h" +#include "riscv64_live.h" +#include "mir_lower.h" +#include "securec.h" + +namespace maplebe { +/* + * NB. As an optimization we can use X8 as a scratch (temporary) + * register if the return value is not returned through memory. + */ +constexpr uint32 kCondBrNum = 2; +constexpr uint32 kSwitchCaseNum = 5; + +Operand *AArch64RegAllocator::HandleRegOpnd(Operand &opnd) { + ASSERT(opnd.IsRegister(), "Operand should be register operand"); + auto ®Opnd = static_cast(opnd); + if (regOpnd.IsOfCC()) { + return &opnd; + } + if (!regOpnd.IsVirtualRegister()) { + availRegSet[regOpnd.GetRegisterNumber()] = false; + (void)liveReg.insert(regOpnd.GetRegisterNumber()); + return static_cast(®Opnd); + } + auto regMapIt = regMap.find(regOpnd.GetRegisterNumber()); + auto *a64CGFunc = static_cast(cgFunc); + if (regMapIt != regMap.end()) { /* already allocated this register */ + ASSERT(AArch64isa::IsPhysicalRegister(regMapIt->second), "must be a physical register"); + AArch64reg newRegNO = regMapIt->second; + availRegSet[newRegNO] = false; /* make sure the real register can not be allocated and live */ + (void)liveReg.insert(newRegNO); + (void)allocatedSet.insert(&opnd); + return &a64CGFunc->GetOrCreatePhysicalRegisterOperand(newRegNO, regOpnd.GetSize(), regOpnd.GetRegisterType()); + } + if (AllocatePhysicalRegister(regOpnd)) { + (void)allocatedSet.insert(&opnd); + auto regMapItSecond = regMap.find(regOpnd.GetRegisterNumber()); + ASSERT(regMapItSecond != regMap.end(), " ERROR: can not find register number in regmap "); + return &a64CGFunc->GetOrCreatePhysicalRegisterOperand(regMapItSecond->second, regOpnd.GetSize(), + regOpnd.GetRegisterType()); + } + + /* use 0 register as spill register */ + regno_t regNO = 0; + return &a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(regNO), regOpnd.GetSize(), + regOpnd.GetRegisterType()); +} + +Operand *AArch64RegAllocator::HandleMemOpnd(Operand &opnd) { + ASSERT(opnd.IsMemoryAccessOperand(), "Operand should be memory access operand"); + auto *memOpnd = static_cast(&opnd); + Operand *res = nullptr; + switch (memOpnd->GetAddrMode()) { + case AArch64MemOperand::kAddrModeBOi: + res = AllocSrcOpnd(*memOpnd->GetBaseRegister()); + ASSERT(res->IsRegister(), "must be register"); + ASSERT(!static_cast(res)->IsVirtualRegister(), "not a virtual register"); + memOpnd->SetBaseRegister(static_cast(*res)); + break; + case AArch64MemOperand::kAddrModeBOrX: + res = AllocSrcOpnd(*memOpnd->GetBaseRegister()); + ASSERT(res->IsRegister(), "must be register"); + ASSERT(!static_cast(res)->IsVirtualRegister(), "not a virtual register"); + memOpnd->SetBaseRegister(static_cast(*res)); + res = AllocSrcOpnd(*memOpnd->GetOffsetRegister()); + ASSERT(res->IsRegister(), "must be register"); + ASSERT(!static_cast(res)->IsVirtualRegister(), "not a virtual register"); + memOpnd->SetOffsetRegister(static_cast(*res)); + break; + case AArch64MemOperand::kAddrModeLiteral: + break; + case AArch64MemOperand::kAddrModeLo12Li: + res = AllocSrcOpnd(*memOpnd->GetBaseRegister()); + ASSERT(res->IsRegister(), "must be register"); + ASSERT(!static_cast(res)->IsVirtualRegister(), "not a virtual register"); + memOpnd->SetBaseRegister(static_cast(*res)); + break; + default: + ASSERT(false, "ERROR: should not run here"); + break; + } + (void)allocatedSet.insert(&opnd); + return memOpnd; +} + +Operand *AArch64RegAllocator::AllocSrcOpnd(Operand &opnd, OpndProp *prop) { + auto *opndProp = static_cast(prop); + if (opndProp != nullptr && (opndProp->GetRegProp().GetRegType() == kRegTyCc || + opndProp->GetRegProp().GetRegType() == kRegTyVary)) { + return &opnd; + } + if (opnd.IsRegister()) { + return HandleRegOpnd(opnd); + } else if (opnd.IsMemoryAccessOperand()) { + return HandleMemOpnd(opnd); + } + ASSERT(false, "NYI"); + return nullptr; +} + +Operand *AArch64RegAllocator::AllocDestOpnd(Operand &opnd, const Insn &insn) { + if (!opnd.IsRegister()) { + ASSERT(false, "result operand must be of type register"); + return nullptr; + } + auto ®Opnd = static_cast(opnd); + if (!regOpnd.IsVirtualRegister()) { + auto reg = static_cast(regOpnd.GetRegisterNumber()); + availRegSet[reg] = true; + uint32 id = GetRegLivenessId(®Opnd); + if (id && (id <= insn.GetId())) { + ReleaseReg(reg); + } + return &opnd; + } + + auto *a64CGFunc = static_cast(cgFunc); + auto regMapIt = regMap.find(regOpnd.GetRegisterNumber()); + if (regMapIt != regMap.end()) { + AArch64reg reg = regMapIt->second; + if (!insn.IsCondDef()) { + uint32 id = GetRegLivenessId(®Opnd); + if (id && (id <= insn.GetId())) { + ReleaseReg(reg); + } + } + } else { + /* AllocatePhysicalRegister insert a mapping from vreg no to phy reg no into regMap */ + if (AllocatePhysicalRegister(regOpnd)) { + regMapIt = regMap.find(regOpnd.GetRegisterNumber()); + if (!insn.IsCondDef()) { + uint32 id = GetRegLivenessId(®Opnd); + if (id && (id <= insn.GetId())) { + ReleaseReg(regMapIt->second); + } + } + } else { + /* For register spill. use 0 register as spill register */ + regno_t regNO = 0; + return &a64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(regNO), regOpnd.GetSize(), + regOpnd.GetRegisterType()); + } + } + (void)allocatedSet.insert(&opnd); + return &a64CGFunc->GetOrCreatePhysicalRegisterOperand(regMapIt->second, regOpnd.GetSize(), regOpnd.GetRegisterType()); +} + +void AArch64RegAllocator::PreAllocate() { + FOR_ALL_BB(bb, cgFunc) { + if (bb->IsEmpty()) { + continue; + } + FOR_BB_INSNS_SAFE(insn, bb, nextInsn) { + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + if (!md->UseSpecReg()) { + continue; + } + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + auto *opndProp = static_cast(md->operand[i]); + if (!opndProp->IsPhysicalRegister()) { + continue; + } + auto *a64CGFunc = static_cast(cgFunc); + auto ®Opnd = static_cast(opnd); + AArch64RegOperand &phyReg = a64CGFunc->GetOrCreatePhysicalRegisterOperand( + opndProp->GetRegProp().GetPhysicalReg(), opnd.GetSize(), kRegTyInt); + if (opndProp->IsRegDef()) { + Insn &newInsn = a64CGFunc->GetCG()->BuildInstruction(a64CGFunc->PickMovInsn(regOpnd, phyReg), + regOpnd, phyReg); + bb->InsertInsnAfter(*insn, newInsn); + } else { + Insn &newInsn = a64CGFunc->GetCG()->BuildInstruction(a64CGFunc->PickMovInsn(phyReg, regOpnd), + phyReg, regOpnd); + bb->InsertInsnBefore(*insn, newInsn); + } + insn->SetOperand(i, phyReg); + } + } + } +} + +void AArch64RegAllocator::AllocHandleCallee(Insn &insn, const AArch64MD &md) { + auto *a64CGFunc = static_cast(cgFunc); + Operand &opnd1 = insn.GetOperand(1); + if (opnd1.IsList()) { + auto &srcOpnds = static_cast(insn.GetOperand(1)); + auto *srcOpndsNew = + a64CGFunc->GetMemoryPool()->New(*a64CGFunc->GetFuncScopeAllocator()); + for (auto *regOpnd : srcOpnds.GetOperands()) { + ASSERT(!regOpnd->IsVirtualRegister(), "not be a virtual register"); + auto physicalReg = static_cast(regOpnd->GetRegisterNumber()); + availRegSet[physicalReg] = false; + (void)liveReg.insert(physicalReg); + srcOpndsNew->PushOpnd( + a64CGFunc->GetOrCreatePhysicalRegisterOperand(physicalReg, regOpnd->GetSize(), regOpnd->GetRegisterType())); + } + insn.SetOperand(1, *srcOpndsNew); + } + + Operand &opnd = insn.GetOperand(0); + if (opnd.IsRegister() && static_cast(md.operand[0])->IsRegUse()) { + if (allocatedSet.find(&opnd) != allocatedSet.end()) { + auto ®Opnd = static_cast(opnd); + AArch64reg physicalReg = regMap[regOpnd.GetRegisterNumber()]; + Operand &phyRegOpnd = a64CGFunc->GetOrCreatePhysicalRegisterOperand(physicalReg, regOpnd.GetSize(), + regOpnd.GetRegisterType()); + insn.SetOperand(0, phyRegOpnd); + } else { + Operand *srcOpnd = AllocSrcOpnd(opnd, md.operand[0]); + CHECK_NULL_FATAL(srcOpnd); + insn.SetOperand(0, *srcOpnd); + } + } +} + +void AArch64RegAllocator::GetPhysicalRegisterBank(RegType regTy, uint8 &begin, uint8 &end) { + switch (regTy) { + case kRegTyVary: + case kRegTyCc: + begin = kRinvalid; + end = kRinvalid; + break; + case kRegTyInt: + begin = R0; + end = R28; + break; + case kRegTyFloat: + begin = V0; + end = V31; + break; + default: + ASSERT(false, "NYI"); + break; + } +} + +void AArch64RegAllocator::InitAvailReg() { + errno_t eNum = memset_s(availRegSet, kAllRegNum, true, sizeof(availRegSet)); + if (eNum) { + CHECK_FATAL(false, "memset_s failed"); + } + availRegSet[R29] = false; /* FP */ + availRegSet[RLR] = false; + availRegSet[RSP] = false; + availRegSet[RZR] = false; + + /* + * when yieldpoint is enabled, + * the dedicated register is not available. + */ + if (cgFunc->GetCG()->GenYieldPoint()) { + availRegSet[RYP] = false; + } +} + +bool AArch64RegAllocator::IsYieldPointReg(AArch64reg regNO) const { + if (cgFunc->GetCG()->GenYieldPoint()) { + return (regNO == RYP); + } + return false; +} + +/* these registers can not be allocated */ +bool AArch64RegAllocator::IsSpecialReg(AArch64reg reg) const { + if ((reg == RLR) || (reg == RSP)) { + return true; + } + + /* when yieldpoint is enabled, the dedicated register can not be allocated. */ + if (cgFunc->GetCG()->GenYieldPoint() && (reg == RYP)) { + return true; + } + + const auto *aarch64CGFunc = static_cast(cgFunc); + for (const auto &it : aarch64CGFunc->GetFormalRegList()) { + if (it == reg) { + return true; + } + } + return false; +} + +/* Those registers can not be overwrite. */ +bool AArch64RegAllocator::IsUntouchableReg(uint32 regNO) const { + if ((regNO == RSP) || (regNO == RFP)) { + return true; + } + + /* when yieldpoint is enabled, the RYP(x19) can not be used. */ + if (cgFunc->GetCG()->GenYieldPoint() && (regNO == RYP)) { + return true; + } + + return false; +} + +void AArch64RegAllocator::ReleaseReg(RegOperand ®Opnd) { + ReleaseReg(regMap[regOpnd.GetRegisterNumber()]); +} + +void AArch64RegAllocator::ReleaseReg(AArch64reg reg) { + ASSERT(reg < kMaxRegNum, "can't release virtual register"); + liveReg.erase(reg); + if (!IsSpecialReg(static_cast(reg))) { + availRegSet[reg] = true; + } +} + +/* trying to allocate a physical register to opnd. return true if success */ +bool AArch64RegAllocator::AllocatePhysicalRegister(RegOperand &opnd) { + RegType regType = opnd.GetRegisterType(); + uint8 regStart = 0; + uint8 regEnd = 0; + GetPhysicalRegisterBank(regType, regStart, regEnd); + + for (uint8 reg = regStart; reg <= regEnd; ++reg) { + if (!availRegSet[reg]) { + continue; + } + + regMap[opnd.GetRegisterNumber()] = AArch64reg(reg); + availRegSet[reg] = false; + (void)liveReg.insert(reg); /* this register is live now */ + return true; + } + return false; +} + +/* If opnd is a callee saved register, save it in the prolog and restore it in the epilog */ +void AArch64RegAllocator::SaveCalleeSavedReg(RegOperand ®Opnd) { + regno_t regNO = regOpnd.GetRegisterNumber(); + auto a64Reg = static_cast(regOpnd.IsVirtualRegister() ? regMap[regNO] : regNO); + /* when yieldpoint is enabled, skip the reserved register for yieldpoint. */ + if (cgFunc->GetCG()->GenYieldPoint() && (a64Reg == RYP)) { + return; + } + + if (AArch64Abi::IsCalleeSavedReg(a64Reg)) { + static_cast(cgFunc)->AddtoCalleeSaved(a64Reg); + } +} + +bool AArch64RegAllocator::AllPredBBVisited(BB &bb) const { + bool isAllPredsVisited = true; + for (const auto *predBB : bb.GetPreds()) { + /* See if pred bb is a loop back edge */ + bool isBackEdge = false; + for (const auto *loopBB : predBB->GetLoopSuccs()) { + if (loopBB == &bb) { + isBackEdge = true; + break; + } + } + if (!isBackEdge && !visitedBBs[predBB->GetId()]) { + isAllPredsVisited = false; + break; + } + } + for (const auto *predEhBB : bb.GetEhPreds()) { + bool isBackEdge = false; + for (const auto *loopBB : predEhBB->GetLoopSuccs()) { + if (loopBB == &bb) { + isBackEdge = true; + break; + } + } + if (!isBackEdge && !visitedBBs[predEhBB->GetId()]) { + isAllPredsVisited = false; + break; + } + } + return isAllPredsVisited; +} + +/* + * During live interval construction, bb has only one predecessor and/or one + * successor are stright line bb. It can be considered to be a single large bb + * for the purpose of finding live interval. This is to prevent extending live + * interval of registers unnecessarily when interleaving bb from other paths. + */ +BB *AArch64RegAllocator::MarkStraightLineBBInBFS(BB *bb) { + while (true) { + if ((bb->GetSuccs().size() != 1) || !bb->GetEhSuccs().empty()) { + break; + } + BB *sbb = bb->GetSuccs().front(); + if (visitedBBs[sbb->GetId()]) { + break; + } + if ((sbb->GetPreds().size() != 1) || !sbb->GetEhPreds().empty()) { + break; + } + sortedBBs.push_back(sbb); + visitedBBs[sbb->GetId()] = true; + bb = sbb; + } + return bb; +} + +BB *AArch64RegAllocator::SearchForStraightLineBBs(BB &bb) { + if ((bb.GetSuccs().size() != kCondBrNum) || bb.GetEhSuccs().empty()) { + return &bb; + } + BB *sbb1 = bb.GetSuccs().front(); + BB *sbb2 = bb.GetSuccs().back(); + size_t predSz1 = sbb1->GetPreds().size(); + size_t predSz2 = sbb2->GetPreds().size(); + BB *candidateBB = nullptr; + if ((predSz1 == 1) && (predSz2 > kSwitchCaseNum)) { + candidateBB = sbb1; + } else if ((predSz2 == 1) && (predSz1 > kSwitchCaseNum)) { + candidateBB = sbb2; + } else { + return &bb; + } + ASSERT(candidateBB->GetId() < visitedBBs.size(), "index out of range in RA::SearchForStraightLineBBs"); + if (visitedBBs[candidateBB->GetId()]) { + return &bb; + } + if (!candidateBB->GetEhPreds().empty()) { + return &bb; + } + if (candidateBB->GetSuccs().size() != 1) { + return &bb; + } + + sortedBBs.push_back(candidateBB); + visitedBBs[candidateBB->GetId()] = true; + return MarkStraightLineBBInBFS(candidateBB); +} + +void AArch64RegAllocator::BFS(BB &curBB) { + std::queue workList; + workList.push(&curBB); + ASSERT(curBB.GetId() < cgFunc->NumBBs(), "RA::BFS visitedBBs overflow"); + ASSERT(curBB.GetId() < visitedBBs.size(), "index out of range in RA::BFS"); + visitedBBs[curBB.GetId()] = true; + do { + BB *bb = workList.front(); + sortedBBs.push_back(bb); + ASSERT(bb->GetId() < cgFunc->NumBBs(), "RA::BFS visitedBBs overflow"); + visitedBBs[bb->GetId()] = true; + workList.pop(); + /* Look for straight line bb */ + bb = MarkStraightLineBBInBFS(bb); + /* Look for an 'if' followed by some straight-line bb */ + bb = SearchForStraightLineBBs(*bb); + for (auto *ibb : bb->GetSuccs()) { + /* See if there are unvisited predecessor */ + if (visitedBBs[ibb->GetId()]) { + continue; + } + if (AllPredBBVisited(*ibb)) { + workList.push(ibb); + ASSERT(ibb->GetId() < cgFunc->NumBBs(), "GCRA::BFS visitedBBs overflow"); + visitedBBs[ibb->GetId()] = true; + } + } + } while (!workList.empty()); +} + +void AArch64RegAllocator::ComputeBlockOrder() { + visitedBBs.clear(); + sortedBBs.clear(); + visitedBBs.resize(cgFunc->NumBBs()); + for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) { + visitedBBs[i] = false; + } + BB *cleanupBB = nullptr; + FOR_ALL_BB(bb, cgFunc) { + bb->SetInternalFlag1(0); + if (bb->GetFirstStmt() == cgFunc->GetCleanupLabel()) { + cleanupBB = bb; + } + } + for (BB *bb = cleanupBB; bb != nullptr; bb = bb->GetNext()) { + bb->SetInternalFlag1(1); + } + + bool changed; + size_t sortedCnt = 0; + bool done = false; + do { + changed = false; + FOR_ALL_BB(bb, cgFunc) { + if (bb->GetInternalFlag1() == 1) { + continue; + } + if (visitedBBs[bb->GetId()]) { + continue; + } + changed = true; + if (AllPredBBVisited(*bb)) { + BFS(*bb); + } + } + /* Make sure there is no infinite loop. */ + if (sortedCnt == sortedBBs.size()) { + if (!done) { + done = true; + } else { + LogInfo::MapleLogger() << "Error: RA BFS loop " << sortedCnt << " in func " << cgFunc->GetName() << "\n"; + } + } + sortedCnt = sortedBBs.size(); + } while (changed); + + for (BB *bb = cleanupBB; bb != nullptr; bb = bb->GetNext()) { + sortedBBs.push_back(bb); + } +} + +uint32 AArch64RegAllocator::GetRegLivenessId(Operand *opnd) { + auto regIt = regLiveness.find(opnd); + return ((regIt == regLiveness.end()) ? 0 : regIt->second); +} + +void AArch64RegAllocator::SetupRegLiveness(BB *bb) { + regLiveness.clear(); + + uint32 id = 1; + FOR_BB_INSNS_REV(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + insn->SetId(id); + id++; + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; i++) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *aarch64Opndprop = static_cast(md->operand[i]); + if (!aarch64Opndprop->IsRegDef()) { + continue; + } + if (opnd.IsRegister()) { + regLiveness[&opnd] = insn->GetId(); + } + } + } +} + +bool DefaultO0RegAllocator::AllocateRegisters() { + InitAvailReg(); + PreAllocate(); + cgFunc->SetIsAfterRegAlloc(); + + auto *a64CGFunc = static_cast(cgFunc); + /* + * we store both FP/LR if using FP or if not using FP, but func has a call + * Using FP, record it for saving + */ + a64CGFunc->AddtoCalleeSaved(RFP); + a64CGFunc->AddtoCalleeSaved(RLR); + a64CGFunc->NoteFPLRAddedToCalleeSavedList(); + + FOR_ALL_BB_REV(bb, a64CGFunc) { + if (bb->IsEmpty()) { + continue; + } + + SetupRegLiveness(bb); + FOR_BB_INSNS_REV(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + + if (md->IsCall() && (insn->GetMachineOpcode() != MOP_clinit)) { + AllocHandleCallee(*insn, *md); + continue; + } + + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { /* the dest registers */ + Operand &opnd = insn->GetOperand(i); + if (!static_cast(md->operand[i])->IsRegDef()) { + continue; + } + if (allocatedSet.find(&opnd) != allocatedSet.end()) { + /* free the live range of this register */ + auto ®Opnd = static_cast(opnd); + SaveCalleeSavedReg(regOpnd); + if (insn->IsAtomicStore() || insn->IsSpecialIntrinsic()) { + /* remember the physical machine register assigned */ + regno_t regNO = regOpnd.GetRegisterNumber(); + rememberRegs.push_back(static_cast(regOpnd.IsVirtualRegister() ? regMap[regNO] : regNO)); + } else if (!insn->IsCondDef()) { + uint32 id = GetRegLivenessId(®Opnd); + if (id && (id <= insn->GetId())) { + ReleaseReg(regOpnd); + } + } + insn->SetOperand(i, a64CGFunc->GetOrCreatePhysicalRegisterOperand( + regMap[regOpnd.GetRegisterNumber()], regOpnd.GetSize(), regOpnd.GetRegisterType())); + continue; /* already allocated */ + } + + if (opnd.IsRegister()) { + insn->SetOperand(static_cast(i), *AllocDestOpnd(opnd, *insn)); + SaveCalleeSavedReg(static_cast(opnd)); + } + } + + for (uint32 i = 0; i < opndNum; ++i) { /* the src registers */ + Operand &opnd = insn->GetOperand(i); + if (!(static_cast(md->operand[i])->IsRegUse() || opnd.GetKind() == Operand::kOpdMem)) { + continue; + } + if (allocatedSet.find(&opnd) != allocatedSet.end() && opnd.IsRegister()) { + auto ®Opnd = static_cast(opnd); + AArch64reg reg = regMap[regOpnd.GetRegisterNumber()]; + availRegSet[reg] = false; + (void)liveReg.insert(reg); /* this register is live now */ + insn->SetOperand(i, a64CGFunc->GetOrCreatePhysicalRegisterOperand(reg, regOpnd.GetSize(), + regOpnd.GetRegisterType())); + } else { + Operand *srcOpnd = AllocSrcOpnd(opnd, md->operand[i]); + CHECK_NULL_FATAL(srcOpnd); + insn->SetOperand(i, *srcOpnd); + } + } + /* hack. a better way to handle intrinsics? */ + for (auto rememberReg : rememberRegs) { + ASSERT(rememberReg != kRinvalid, "not a valid register"); + ReleaseReg(rememberReg); + } + rememberRegs.clear(); + } + } + cgFunc->SetIsAfterRegAlloc(); + return true; +} + +AnalysisResult *CgDoRegAlloc::Run(CGFunc *cgFunc, CgFuncResultMgr *cgFuncResultMgr) { + MemPool *phaseMp = NewMemPool(); + LiveAnalysis *live = nullptr; + /* It doesn't need live range information when -O1, because the register will not live out of bb. */ + if (Globals::GetInstance()->GetOptimLevel() >= 1) { + live = static_cast(cgFuncResultMgr->GetAnalysisResult(kCGFuncPhaseLIVE, cgFunc)); + CHECK_FATAL(live != nullptr, "null ptr check"); + /* revert liveanalysis result container. */ + live->ResetLiveSet(); + } + + RegAllocator *regAllocator = nullptr; + if (Globals::GetInstance()->GetOptimLevel() == 0) { + regAllocator = phaseMp->New(*cgFunc, *phaseMp); + } else { + if (cgFunc->GetCG()->GetCGOptions().DoLinearScanRegisterAllocation()) { + regAllocator = phaseMp->New(*cgFunc, *phaseMp); + } else if (cgFunc->GetCG()->GetCGOptions().DoColoringBasedRegisterAllocation()) { + regAllocator = phaseMp->New(*cgFunc, *phaseMp); + } else { + maple::LogInfo::MapleLogger(kLlErr) << "Warning: We only support Linear Scan and GraphColor register allocation\n"; + } + } + + CHECK_FATAL(regAllocator != nullptr, "regAllocator is null in CgDoRegAlloc::Run"); + cgFuncResultMgr->GetAnalysisResult(kCGFuncPhaseLOOP, cgFunc); + cgFunc->SetIsAfterRegAlloc(); + regAllocator->AllocateRegisters(); + /* the live range info may changed, so invalid the info. */ + if (live != nullptr) { + live->ClearInOutDataInfo(); + } + cgFuncResultMgr->InvalidAnalysisResult(kCGFuncPhaseLIVE, cgFunc); + cgFuncResultMgr->InvalidAnalysisResult(kCGFuncPhaseLOOP, cgFunc); + return nullptr; +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_schedule.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_schedule.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f51e4e2015d77670ac846a402579b440913df520 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_schedule.cpp @@ -0,0 +1,1200 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_schedule.h" +#include +#include "riscv64_cg.h" +#include "riscv64_operand.h" +#include "riscv64_dependence.h" +#include "pressure.h" + +/* + * This phase is Instruction Scheduling. + * There is a local list scheduling, it is scheduling in basic block. + * The entry is AArch64Schedule::ListScheduling, will traversal all basic block, + * for a basic block: + * 1. build a dependence graph; + * 2. combine clinit pairs and str&ldr pairs; + * 3. reorder instructions. + */ +namespace maplebe { +namespace { +constexpr uint32 kClinitAdvanceCycle = 10; +constexpr uint32 kAdrpLdrAdvanceCycle = 2; +constexpr uint32 kClinitTailAdvanceCycle = 4; +} + +uint32 AArch64Schedule::maxUnitIndex = 0; +/* Init schedule's data struction. */ +void AArch64Schedule::Init() { + readyList.clear(); + nodeSize = nodes.size(); + lastSeparatorIndex = 0; + mad->ReleaseAllUnits(); + DepNode *node = nodes[0]; + + ASSERT(node->GetType() == kNodeTypeSeparator, "CG internal error, the first node should be a separator node."); + + if (CGOptions::IsDruteForceSched() || CGOptions::IsSimulateSched()) { + for (auto nodeTemp : nodes) { + nodeTemp->SetVisit(0); + nodeTemp->SetState(kNormal); + nodeTemp->SetSchedCycle(0); + nodeTemp->SetEStart(0); + nodeTemp->SetLStart(0); + } + } + + readyList.emplace_back(node); + node->SetState(kReady); + + /* Init validPredsSize and validSuccsSize. */ + for (auto nodeTemp : nodes) { + nodeTemp->SetValidPredsSize(nodeTemp->GetPreds().size()); + nodeTemp->SetValidSuccsSize(nodeTemp->GetSuccs().size()); + } +} + +/* + * A insn which can be combine should meet this conditions: + * 1. it is str/ldr insn; + * 2. address mode is kAddrModeBOi, [baseReg, offset]; + * 3. the register operand size equal memory operand size; + * 4. if define USE_32BIT_REF, register operand size should be 4 byte; + * 5. for stp/ldp, the imm should be within -512 and 504(64bit), or -256 and 252(32bit); + * 6. pair instr for 8/4 byte registers must have multiple of 8/4 for imm. + * If insn can be combine, return true. + */ +bool AArch64Schedule::CanCombine(const Insn &insn) const { + MOperator opCode = insn.GetMachineOpcode(); + if ((opCode != MOP_xldr) && (opCode != MOP_wldr) && (opCode != MOP_dldr) && (opCode != MOP_sldr) && + (opCode != MOP_xstr) && (opCode != MOP_wstr) && (opCode != MOP_dstr) && (opCode != MOP_sstr)) { + return false; + } + + ASSERT(insn.GetOperand(1).IsMemoryAccessOperand(), "expects mem operands"); + auto &memOpnd = static_cast(insn.GetOperand(1)); + AArch64MemOperand::AArch64AddressingMode addrMode = memOpnd.GetAddrMode(); + if ((addrMode != AArch64MemOperand::kAddrModeBOi) || !memOpnd.IsIntactIndexed()) { + return false; + } + + auto ®Opnd = static_cast(insn.GetOperand(0)); + if (regOpnd.GetSize() != memOpnd.GetSize()) { + return false; + } + + uint32 size = regOpnd.GetSize() >> kLog2BitsPerByte; +#ifdef USE_32BIT_REF + if (insn.IsAccessRefField() && (size > (kIntregBytelen >> 1))) { + return false; + } +#endif /* USE_32BIT_REF */ + + AArch64OfstOperand *offset = memOpnd.GetOffsetImmediate(); + if (offset == nullptr) { + return false; + } + int32 offsetValue = offset->GetOffsetValue(); + if (size == kIntregBytelen) { /* 64 bit */ + if ((offsetValue <= kStpLdpImm64LowerBound) || (offsetValue >= kStpLdpImm64UpperBound)) { + return false; + } + } else if (size == (kIntregBytelen >> 1)) { /* 32 bit */ + if ((offsetValue <= kStpLdpImm32LowerBound) || (offsetValue >= kStpLdpImm32UpperBound)) { + return false; + } + } + + /* pair instr for 8/4 byte registers must have multiple of 8/4 for imm */ + if ((static_cast(offsetValue) % size) != 0) { + return false; + } + return true; +} + +/* After building dependence graph, combine str&ldr pairs. */ +void AArch64Schedule::MemoryAccessPairOpt() { + Init(); + std::vector memList; + + while ((!readyList.empty()) || !memList.empty()) { + DepNode *readNode = nullptr; + if (!readyList.empty()) { + readNode = readyList[0]; + readyList.erase(readyList.begin()); + } else { + if (memList[0]->GetType() != kNodeTypeEmpty) { + FindAndCombineMemoryAccessPair(memList); + } + readNode = memList[0]; + memList.erase(memList.begin()); + } + + /* schedule readNode */ + CHECK_FATAL(readNode != nullptr, "readNode is null in MemoryAccessPairOpt"); + readNode->SetState(kScheduled); + + /* add readNode's succs to readyList or memList. */ + for (auto succLink : readNode->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + succNode.DescreaseValidPredsSize(); + if (succNode.GetValidPredsSize() == 0) { + ASSERT(succNode.GetState() == kNormal, "schedule state should be kNormal"); + succNode.SetState(kReady); + ASSERT(succNode.GetInsn() != nullptr, "insn can't be nullptr!"); + if (CanCombine(*succNode.GetInsn())) { + memList.emplace_back(&succNode); + } else { + readyList.emplace_back(&succNode); + } + } + } + } + + for (auto node : nodes) { + node->SetVisit(0); + node->SetState(kNormal); + } +} + +/* Find and combine correct MemoryAccessPair for memList[0]. */ +void AArch64Schedule::FindAndCombineMemoryAccessPair(const std::vector &memList) { + ASSERT(!memList.empty(), "memList should not be empty"); + CHECK_FATAL(memList[0]->GetInsn() != nullptr, "memList[0]'s insn should not be nullptr"); + AArch64MemOperand *currMemOpnd = static_cast(memList[0]->GetInsn()->GetMemOpnd()); + ASSERT(currMemOpnd != nullptr, "opnd should not be nullptr"); + ASSERT(currMemOpnd->IsMemoryAccessOperand(), "opnd should be memOpnd"); + int32 currOffsetVal = currMemOpnd->GetOffsetImmediate()->GetOffsetValue(); + MOperator currMop = memList[0]->GetInsn()->GetMachineOpcode(); + /* find a depNode to combine with memList[0], and break; */ + for (auto it = std::next(memList.begin(), 1); it != memList.end(); ++it) { + ASSERT((*it)->GetInsn() != nullptr, "null ptr check"); + + if (currMop == (*it)->GetInsn()->GetMachineOpcode()) { + AArch64MemOperand *nextMemOpnd = static_cast((*it)->GetInsn()->GetMemOpnd()); + CHECK_FATAL(nextMemOpnd != nullptr, "opnd should not be nullptr"); + CHECK_FATAL(nextMemOpnd->IsMemoryAccessOperand(), "opnd should be MemOperand"); + int32 nextOffsetVal = nextMemOpnd->GetOffsetImmediate()->GetOffsetValue(); + uint32 size = currMemOpnd->GetSize() >> kLog2BitsPerByte; + if ((nextMemOpnd->GetBaseRegister() == currMemOpnd->GetBaseRegister()) && + (nextMemOpnd->GetSize() == currMemOpnd->GetSize()) && + (static_cast(abs(nextOffsetVal - currOffsetVal)) == size)) { + /* + * In ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile + * LDP on page K1-6125 declare that ldp can't use same reg + */ + if (((currMop == MOP_xldr) || (currMop == MOP_sldr) || (currMop == MOP_dldr) || (currMop == MOP_wldr)) && + &(memList[0]->GetInsn()->GetOperand(0)) == &((*it)->GetInsn()->GetOperand(0))) { + continue; + } + + if (LIST_SCHED_DUMP_REF) { + LogInfo::MapleLogger() << "Combine inse: " << "\n"; + memList[0]->GetInsn()->Dump(); + (*it)->GetInsn()->Dump(); + } + depAnalysis->CombineMemoryAccessPair(*memList[0], **it, nextOffsetVal > currOffsetVal); + if (LIST_SCHED_DUMP_REF) { + LogInfo::MapleLogger() << "To: " << "\n"; + memList[0]->GetInsn()->Dump(); + } + break; + } + } + } +} + +/* combine clinit pairs. */ +void AArch64Schedule::ClinitPairOpt() { + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + auto nextIt = std::next(it, 1); + if (nextIt == nodes.end()) { + return; + } + + if ((*it)->GetInsn()->GetMachineOpcode() == MOP_adrp_ldr) { + if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) { + depAnalysis->CombineClinit(**it, **(nextIt), false); + } else if ((*nextIt)->GetType() == kNodeTypeSeparator) { + nextIt = std::next(nextIt, 1); + if (nextIt == nodes.end()) { + return; + } + if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) { + /* Do something. */ + depAnalysis->CombineClinit(**it, **(nextIt), true); + } + } + } + } +} + +/* Return the next node's index who is kNodeTypeSeparator. */ +uint32 AArch64Schedule::GetNextSepIndex() const { + return ((lastSeparatorIndex + kMaxDependenceNum) < nodeSize) ? (lastSeparatorIndex + kMaxDependenceNum) + : (nodes.size() - 1); +} + +/* Do register pressure schduling. */ +void AArch64Schedule::RegPressureScheduling(BB &bb, MapleVector &nodes) { + RegPressureSchedule *regSchedule = memPool.New(cgFunc, alloc); + /* + * Get physical register amount currently + * undef, Int Reg, Float Reg, Flag Reg + */ + const std::vector kRegNumVec = { 0, V0, kMaxRegNum - V0 + 1, 1 }; + regSchedule->InitBBInfo(bb, memPool, nodes); + regSchedule->BuildPhyRegInfo(kRegNumVec); + regSchedule->DoScheduling(nodes); +} + +/* + * Compute earliest start of the node, + * return value : the maximum estart. + */ +uint32 AArch64Schedule::ComputeEstart(uint32 cycle) { + std::vector readyNodes; + uint32 maxIndex = GetNextSepIndex(); + + if (CGOptions::IsDebugSched()) { + /* Check validPredsSize. */ + for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) { + DepNode *node = nodes[i]; + int32 schedNum = 0; + for (const auto *predLink : node->GetPreds()) { + if (predLink->GetFrom().GetState() == kScheduled) { + ++schedNum; + } + } + ASSERT((node->GetPreds().size() - schedNum) == node->GetValidPredsSize(), "validPredsSize error."); + } + } + + ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator, + "CG internal error, nodes[maxIndex] should be a separator node."); + + (void)readyNodes.insert(readyNodes.begin(), readyList.begin(), readyList.end()); + + uint32 maxEstart = cycle; + for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) { + DepNode *node = nodes[i]; + node->SetVisit(0); + } + + for (auto *node : readyNodes) { + ASSERT(node->GetState() == kReady, "CG internal error, all nodes in ready list should be ready."); + if (node->GetEStart() < cycle) { + node->SetEStart(cycle); + } + } + + while (!readyNodes.empty()) { + DepNode *node = readyNodes.front(); + readyNodes.erase(readyNodes.begin()); + + for (const auto *succLink : node->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + if (succNode.GetType() == kNodeTypeSeparator) { + continue; + } + + if (succNode.GetEStart() < (node->GetEStart() + succLink->GetLatency())) { + succNode.SetEStart(node->GetEStart() + succLink->GetLatency()); + } + maxEstart = (maxEstart < succNode.GetEStart() ? succNode.GetEStart() : maxEstart); + succNode.IncreaseVisit(); + if ((succNode.GetVisit() >= succNode.GetValidPredsSize()) && (succNode.GetType() != kNodeTypeSeparator)) { + readyNodes.emplace_back(&succNode); + } + ASSERT(succNode.GetVisit() <= succNode.GetValidPredsSize(), "CG internal error."); + } + } + + return maxEstart; +} + +/* Compute latest start of the node. */ +void AArch64Schedule::ComputeLstart(uint32 maxEstart) { + /* std::vector is better than std::queue in run time */ + std::vector readyNodes; + uint32 maxIndex = GetNextSepIndex(); + + ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator, + "CG internal error, nodes[maxIndex] should be a separator node."); + + for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) { + DepNode *node = nodes[i]; + node->SetLStart(maxEstart); + node->SetVisit(0); + } + + readyNodes.emplace_back(nodes[maxIndex]); + while (!readyNodes.empty()) { + DepNode *node = readyNodes.front(); + readyNodes.erase(readyNodes.begin()); + for (const auto *predLink : node->GetPreds()) { + DepNode &predNode = predLink->GetFrom(); + if (predNode.GetState() == kScheduled) { + continue; + } + + if (predNode.GetLStart() > (node->GetLStart() - predLink->GetLatency())) { + predNode.SetLStart(node->GetLStart() - predLink->GetLatency()); + } + predNode.IncreaseVisit(); + if ((predNode.GetVisit() >= predNode.GetValidSuccsSize()) && (predNode.GetType() != kNodeTypeSeparator)) { + readyNodes.emplace_back(&predNode); + } + + ASSERT(predNode.GetVisit() <= predNode.GetValidSuccsSize(), "CG internal error."); + } + } +} + +/* Compute earliest start and latest start of the node that is in readyList and not be scheduled. */ +void AArch64Schedule::UpdateELStartsOnCycle(uint32 cycle) { + ComputeLstart(ComputeEstart(cycle)); +} + +/* + * If all unit of this node need when it be scheduling is free, this node can be scheduled, + * Return true. + */ +bool DepNode::CanBeScheduled() const { + for (uint32 i = 0; i < unitNum; ++i) { + Unit *unit = units[i]; + if (unit != nullptr) { + if (!unit->IsFree(i)) { + return false; + } + } + } + + return true; +} + +/* Mark those unit that this node need occupy unit when it is being scheduled. */ +void DepNode::OccupyUnits() { + for (uint32 i = 0; i < unitNum; ++i) { + Unit *unit = units[i]; + if (unit != nullptr) { + unit->Occupy(*insn, i); + } + } +} + +/* Get unit kind of this node's units[0]. */ +uint32 DepNode::GetUnitKind() const { + uint32 retValue = 0; + if ((units == nullptr) || (units[0] == nullptr)) { + return retValue; + } + + switch (units[0]->GetUnitId()) { + case kUnitIdSlotD: + retValue |= kUnitKindSlot0; + break; + case kUnitIdAgen: + case kUnitIdSlotSAgen: + retValue |= kUnitKindAgen; + break; + case kUnitIdSlotDAgen: + retValue |= kUnitKindAgen; + retValue |= kUnitKindSlot0; + break; + case kUnitIdHazard: + case kUnitIdSlotSHazard: + retValue |= kUnitKindHazard; + break; + case kUnitIdCrypto: + retValue |= kUnitKindCrypto; + break; + case kUnitIdMul: + case kUnitIdSlotSMul: + retValue |= kUnitKindMul; + break; + case kUnitIdDiv: + retValue |= kUnitKindDiv; + break; + case kUnitIdBranch: + case kUnitIdSlotSBranch: + retValue |= kUnitKindBranch; + break; + case kUnitIdStAgu: + retValue |= kUnitKindStAgu; + break; + case kUnitIdLdAgu: + retValue |= kUnitKindLdAgu; + break; + case kUnitIdFpAluS: + case kUnitIdFpAluD: + retValue |= kUnitKindFpAlu; + break; + case kUnitIdFpMulS: + case kUnitIdFpMulD: + retValue |= kUnitKindFpMul; + break; + case kUnitIdFpDivS: + case kUnitIdFpDivD: + retValue |= kUnitKindFpDiv; + break; + case kUnitIdSlot0LdAgu: + retValue |= kUnitKindSlot0; + retValue |= kUnitKindLdAgu; + break; + case kUnitIdSlot0StAgu: + retValue |= kUnitKindSlot0; + retValue |= kUnitKindStAgu; + break; + default: + break; + } + + return retValue; +} + +/* Count unit kinds to an array. Each element of the array indicates the unit kind number of a node set. */ +void AArch64Schedule::CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const { + (void)arraySize; + ASSERT(arraySize >= kUnitKindLast, "CG internal error. unit kind number is not correct."); + uint32 unitKind = depNode.GetUnitKind(); + int32 index = __builtin_ffs(unitKind); + while (index) { + ASSERT(index < kUnitKindLast, "CG internal error. index error."); + ++array[index]; + unitKind &= ~(1u << (index - 1u)); + index = __builtin_ffs(unitKind); + } +} + +/* Check if a node use a specific unit kind. */ +bool AArch64Schedule::IfUseUnitKind(const DepNode &depNode, uint32 index) { + uint32 unitKind = depNode.GetUnitKind(); + int32 idx = __builtin_ffs(unitKind); + while (idx) { + ASSERT(index < kUnitKindLast, "CG internal error. index error."); + if (idx == index) { + return true; + } + unitKind &= ~(1u << (idx - 1u)); + idx = __builtin_ffs(unitKind); + } + + return false; +} + +/* A sample schedule according dependence graph only, to verify correctness of dependence graph. */ +void AArch64Schedule::RandomTest() { + Init(); + nodes.clear(); + + while (!readyList.empty()) { + DepNode *currNode = readyList.back(); + currNode->SetState(kScheduled); + readyList.pop_back(); + nodes.emplace_back(currNode); + + for (auto succLink : currNode->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + bool ready = true; + for (auto predLink : succNode.GetPreds()) { + DepNode &predNode = predLink->GetFrom(); + if (predNode.GetState() != kScheduled) { + ready = false; + break; + } + } + + if (ready) { + ASSERT(succNode.GetState() == kNormal, "succNode must be kNormal"); + readyList.emplace_back(&succNode); + succNode.SetState(kReady); + } + } + } +} + +/* Remove target from readyList. */ +void AArch64Schedule::EraseNodeFromReadyList(const DepNode &target) { + EraseNodeFromNodeList(target, readyList); +} + +/* Remove target from nodeList. */ +void AArch64Schedule::EraseNodeFromNodeList(const DepNode &target, MapleVector &nodeList) { + for (auto it = nodeList.begin(); it != nodeList.end(); ++it) { + if ((*it) == &target) { + nodeList.erase(it); + return; + } + } + + ASSERT(false, "CG internal error, erase node fail."); +} + +/* Dump all node of availableReadyList schedule information in current cycle. */ +void AArch64Schedule::DumpDebugInfo(const ScheduleProcessInfo &scheduleInfo) { + LogInfo::MapleLogger() << "Current cycle[ " << scheduleInfo.GetCurrCycle() << " ], Available in readyList is : \n"; + for (auto node : scheduleInfo.GetAvailableReadyList()) { + LogInfo::MapleLogger() << "NodeIndex[ " << node->GetIndex() + << " ], Estart[ " << node->GetEStart() << " ], Lstart[ "; + LogInfo::MapleLogger() << node->GetLStart() << " ], slot[ "; + LogInfo::MapleLogger() << + (node->GetReservation() == nullptr ? "SlotNone" : node->GetReservation()->GetSlotName()) << " ], "; + LogInfo::MapleLogger() << "succNodeNum[ " << node->GetSuccs().size() << " ], "; + node->GetInsn()->Dump(); + LogInfo::MapleLogger() << '\n'; + } +} + +/* + * Select a node from availableReadyList according to some heuristic rules, then: + * 1. change targetNode's schedule information; + * 2. try to add successors of targetNode to readyList; + * 3. update unscheduled node set, when targetNode is last kNodeTypeSeparator; + * 4. update AdvanceCycle. + */ +void AArch64Schedule::SelectNode(ScheduleProcessInfo &scheduleInfo) { + auto &availableReadyList = scheduleInfo.GetAvailableReadyList(); + auto it = availableReadyList.begin(); + DepNode *targetNode = *it; + if (availableReadyList.size() > 1) { + CalculateMaxUnitKindCount(scheduleInfo); + ++it; + for (; it != availableReadyList.end(); ++it) { + if (CompareDepNode(**it, *targetNode)) { + targetNode = *it; + } + } + } + scheduleInfo.PushElemIntoScheduledNodes(targetNode); + EraseNodeFromReadyList(*targetNode); + + if (CGOptions::IsDebugSched()) { + LogInfo::MapleLogger() << "TargetNode : "; + targetNode->GetInsn()->Dump(); + LogInfo::MapleLogger() << "\n"; + } + + /* Update readyList. */ + UpdateReadyList(*targetNode, readyList, true); + + if (targetNode->GetType() == kNodeTypeSeparator) { + /* If target node is separator node, update lastSeparatorIndex and calculate those depNodes's estart and lstart + * between current separator node and new Separator node. + */ + if (!scheduleInfo.IsFirstSeparator()) { + lastSeparatorIndex += kMaxDependenceNum; + UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle()); + } else { + scheduleInfo.ResetIsFirstSeparator(); + } + } + + switch (targetNode->GetInsn()->GetLatencyType()) { + case kLtClinit: + scheduleInfo.SetAdvanceCycle(kClinitAdvanceCycle); + break; + case kLtAdrpLdr: + scheduleInfo.SetAdvanceCycle(kAdrpLdrAdvanceCycle); + break; + case kLtClinitTail: + scheduleInfo.SetAdvanceCycle(kClinitTailAdvanceCycle); + break; + default: + break; + } + + if ((scheduleInfo.GetAdvanceCycle() == 0) && mad->IsFullIssued()) { + scheduleInfo.SetAdvanceCycle(1); + } +} + +/* + * Advance mad's cycle until info's advanceCycle equal zero, + * and then clear info's availableReadyList. + */ +void AArch64Schedule::UpdateScheduleProcessInfo(ScheduleProcessInfo &info) { + while (info.GetAdvanceCycle() > 0) { + info.IncCurrCycle(); + mad->AdvanceCycle(); + info.DecAdvanceCycle(); + } + info.ClearAvailableReadyList(); +} + +/* + * Forward traversal readyList, if a node in readyList can be Schedule, add it to availableReadyList. + * Return true, if availableReadyList is not empty. + */ +bool AArch64Schedule::CheckSchedulable(ScheduleProcessInfo &info) const { + for (auto node : readyList) { + if (node->CanBeScheduled() && node->GetEStart() <= info.GetCurrCycle()) { + info.PushElemIntoAvailableReadyList(node); + } + } + + if (info.AvailableReadyListIsEmpty()) { + return false; + } + return true; +} + +/* After building dependence graph, schedule insns. */ +uint32 AArch64Schedule::DoSchedule() { + ScheduleProcessInfo scheduleInfo(nodeSize); + Init(); + UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle()); + + while (!readyList.empty()) { + UpdateScheduleProcessInfo(scheduleInfo); + /* Check if schedulable */ + if (!CheckSchedulable(scheduleInfo)) { + /* Advance cycle. */ + scheduleInfo.SetAdvanceCycle(1); + continue; + } + + if (scheduleInfo.GetLastUpdateCycle() < scheduleInfo.GetCurrCycle()) { + scheduleInfo.SetLastUpdateCycle(scheduleInfo.GetCurrCycle()); + } + + if (CGOptions::IsDebugSched()) { + DumpDebugInfo(scheduleInfo); + } + + /* Select a node to scheduling */ + SelectNode(scheduleInfo); + } + + ASSERT(scheduleInfo.SizeOfScheduledNodes() == nodes.size(), "CG internal error, Not all nodes scheduled."); + + nodes.clear(); + (void)nodes.insert(nodes.begin(), scheduleInfo.GetScheduledNodes().begin(), scheduleInfo.GetScheduledNodes().end()); + /* the second to last node is the true last node, because the last is kNodeTypeSeparator node */ + ASSERT(nodes.size() - 2 >= 0, "size of nodes should be greater than or equal 2"); + return (nodes[nodes.size() - 2]->GetSchedCycle()); +} + +/* + * Comparing priorities of node1 and node2 according to some heuristic rules + * return true if node1's priority is higher + */ +bool AArch64Schedule::CompareDepNode(const DepNode &node1, const DepNode &node2) { + /* less LStart first */ + if (node1.GetLStart() != node2.GetLStart()) { + return node1.GetLStart() < node2.GetLStart(); + } + + /* max unit kind use */ + bool use1 = IfUseUnitKind(node1, maxUnitIndex); + bool use2 = IfUseUnitKind(node2, maxUnitIndex); + if (use1 != use2) { + return use1; + } + + /* slot0 first */ + SlotType slotType1 = node1.GetReservation()->GetSlot(); + SlotType slotType2 = node2.GetReservation()->GetSlot(); + if (slotType1 == kSlots) { + slotType1 = kSlot0; + } + if (slotType2 == kSlots) { + slotType2 = kSlot0; + } + if (slotType1 != slotType2) { + return slotType1 < slotType2; + } + + /* more succNodes fisrt */ + if (node1.GetSuccs().size() != node2.GetSuccs().size()) { + return node1.GetSuccs().size() > node2.GetSuccs().size(); + } + + /* default order */ + return node1.GetInsn()->GetId() < node2.GetInsn()->GetId(); +} + +/* + * Calculate number of every unit that used by avaliableReadyList's nodes and save the max in maxUnitIndex + */ +void AArch64Schedule::CalculateMaxUnitKindCount(ScheduleProcessInfo &scheduleInfo) { + uint32 unitKindCount[kUnitKindLast] = { 0 }; + for (auto node : scheduleInfo.GetAvailableReadyList()) { + CountUnitKind(*node, unitKindCount, kUnitKindLast); + } + + uint32 maxCount = 0; + maxUnitIndex = 0; + for (size_t i = 1; i < kUnitKindLast; ++i) { + if (maxCount < unitKindCount[i]) { + maxCount = unitKindCount[i]; + maxUnitIndex = i; + } + } +} + +/* + * A simulated schedule: + * scheduling instruction in original order to calculate original execute cycles. + */ +uint32 AArch64Schedule::SimulateOnly() { + uint32 currCycle = 0; + uint32 advanceCycle = 0; + Init(); + + for (uint32 i = 0; i < nodes.size();) { + while (advanceCycle > 0) { + ++currCycle; + mad->AdvanceCycle(); + --advanceCycle; + } + + DepNode *targetNode = nodes[i]; + if ((currCycle >= targetNode->GetEStart()) && targetNode->CanBeScheduled()) { + targetNode->SetSimulateCycle(currCycle); + targetNode->OccupyUnits(); + + /* Update estart. */ + for (auto succLink : targetNode->GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + uint32 eStart = currCycle + succLink->GetLatency(); + if (succNode.GetEStart() < eStart) { + succNode.SetEStart(eStart); + } + } + + if (CGOptions::IsDebugSched()) { + LogInfo::MapleLogger() << "[Simulate] TargetNode : "; + targetNode->GetInsn()->Dump(); + LogInfo::MapleLogger() << "\n"; + } + + switch (targetNode->GetInsn()->GetLatencyType()) { + case kLtClinit: + advanceCycle = kClinitAdvanceCycle; + break; + case kLtAdrpLdr: + advanceCycle = kAdrpLdrAdvanceCycle; + break; + case kLtClinitTail: + advanceCycle = kClinitTailAdvanceCycle; + break; + default: + break; + } + + ++i; + } else { + advanceCycle = 1; + } + } + /* the second to last node is the true last node, because the last is kNodeTypeSeparator nod */ + ASSERT(nodes.size() - 2 >= 0, "size of nodes should be greater than or equal 2"); + return (nodes[nodes.size() - 2]->GetSimulateCycle()); +} + +/* Restore dependence graph to normal CGIR. */ +void AArch64Schedule::FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis) { + bb.ClearInsns(); + + const Insn *prevLocInsn = (bb.GetPrev() != nullptr ? bb.GetPrev()->GetLastLoc() : nullptr); + for (auto node : nodes) { + /* Append comments first. */ + for (auto comment : node->GetComments()) { + bb.AppendInsn(*comment); + } + /* Append insn. */ + if (!node->GetClinitInsns().empty()) { + for (auto clinit : node->GetClinitInsns()) { + bb.AppendInsn(*clinit); + } + } else if (node->GetType() == kNodeTypeNormal) { + bb.AppendInsn(*node->GetInsn()); + } + + /* Append cfi instructions. */ + for (auto cfi : node->GetCfiInsns()) { + bb.AppendInsn(*cfi); + } + } + bb.SetLastLoc(prevLocInsn); + + for (auto lastComment : depAnalysis.GetLastComments()) { + bb.AppendInsn(*lastComment); + } +} + +/* For every node of nodes, update it's bruteForceSchedCycle. */ +void AArch64Schedule::UpdateBruteForceSchedCycle() { + for (auto node : nodes) { + node->SetBruteForceSchedCycle(node->GetSchedCycle()); + } +} + +/* Recursively schedule all of the possible node. */ +void AArch64Schedule::IterateBruteForce(DepNode &targetNode, MapleVector &readyList, uint32 currCycle, + MapleVector &scheduledNodes, uint32 &maxCycleCount, + MapleVector &optimizedScheduledNodes) { + /* Save states. */ + constexpr int32 unitSize = 31; + ASSERT(unitSize == mad->GetAllUnitsSize(), "CG internal error."); + std::vector occupyTable; + occupyTable.resize(unitSize, 0); + mad->SaveStates(occupyTable, unitSize); + + /* Schedule targetNode first. */ + targetNode.SetState(kScheduled); + targetNode.SetSchedCycle(currCycle); + scheduledNodes.emplace_back(&targetNode); + + MapleVector tempList = readyList; + EraseNodeFromNodeList(targetNode, tempList); + targetNode.OccupyUnits(); + + /* Update readyList. */ + UpdateReadyList(targetNode, tempList, true); + + if (targetNode.GetType() == kNodeTypeSeparator) { + /* If target node is separator node, update lastSeparatorIndex. */ + lastSeparatorIndex += kMaxDependenceNum; + } + + if (tempList.empty()) { + ASSERT(scheduledNodes.size() == nodes.size(), "CG internal error, Not all nodes scheduled."); + if (currCycle < maxCycleCount) { + maxCycleCount = currCycle; + UpdateBruteForceSchedCycle(); + optimizedScheduledNodes = scheduledNodes; + } + } else { + uint32 advanceCycle = 0; + switch (targetNode.GetInsn()->GetLatencyType()) { + case kLtClinit: + advanceCycle = kClinitAdvanceCycle; + break; + case kLtAdrpLdr: + advanceCycle = kAdrpLdrAdvanceCycle; + break; + case kLtClinitTail: + advanceCycle = kClinitTailAdvanceCycle; + break; + default: + break; + } + + do { + std::vector availableReadyList; + std::vector tempAvailableList; + while (advanceCycle > 0) { + ++currCycle; + mad->AdvanceCycle(); + --advanceCycle; + } + /* Check EStart. */ + for (auto node : tempList) { + if (node->GetEStart() <= currCycle) { + tempAvailableList.emplace_back(node); + } + } + + if (tempAvailableList.empty()) { + /* Advance cycle. */ + advanceCycle = 1; + continue; + } + + /* Check if schedulable */ + for (auto node : tempAvailableList) { + if (node->CanBeScheduled()) { + availableReadyList.emplace_back(node); + } + } + + if (availableReadyList.empty()) { + /* Advance cycle. */ + advanceCycle = 1; + continue; + } + + for (auto node : availableReadyList) { + IterateBruteForce(*node, tempList, currCycle, scheduledNodes, maxCycleCount, optimizedScheduledNodes); + } + + break; + } while (true); + } + + /* + * Recover states. + * Restore targetNode first. + */ + targetNode.SetState(kReady); + targetNode.SetSchedCycle(0); + scheduledNodes.pop_back(); + mad->RestoreStates(occupyTable, unitSize); + + /* Update readyList. */ + for (auto succLink : targetNode.GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + succNode.IncreaseValidPredsSize(); + succNode.SetState(kNormal); + } + + if (targetNode.GetType() == kNodeTypeSeparator) { + /* If target node is separator node, update lastSeparatorIndex. */ + lastSeparatorIndex -= kMaxDependenceNum; + } +} + +/* + * Brute force schedule: + * Finding all possibile schedule list of current bb, and calculate every list's execute cycles, + * return the optimal schedule list and it's cycles. + */ +uint32 AArch64Schedule::DoBruteForceSchedule() { + MapleVector scheduledNodes(alloc.Adapter()); + MapleVector optimizedScheduledNodes(alloc.Adapter()); + + uint32 currCycle = 0; + uint32 maxCycleCount = 0xFFFFFFFF; + Init(); + + /* Schedule First separator. */ + DepNode *targetNode = readyList.front(); + targetNode->SetState(kScheduled); + targetNode->SetSchedCycle(currCycle); + scheduledNodes.emplace_back(targetNode); + readyList.clear(); + + /* Update readyList. */ + UpdateReadyList(*targetNode, readyList, false); + + ASSERT(targetNode->GetType() == kNodeTypeSeparator, "The first node should be separator node."); + ASSERT(!readyList.empty(), "readyList should not be empty."); + + for (auto targetNodeTemp : readyList) { + IterateBruteForce(*targetNodeTemp, readyList, currCycle, scheduledNodes, maxCycleCount, optimizedScheduledNodes); + } + + nodes = optimizedScheduledNodes; + return maxCycleCount; +} + +/* + * Update ready list after the targetNode has been scheduled. + * For every targetNode's successor, if it's all predecessors have been scheduled, + * add it to ready list and update it's information (like state, estart). + */ +void AArch64Schedule::UpdateReadyList(DepNode &targetNode, MapleVector &readyList, bool updateEStart) { + for (auto succLink : targetNode.GetSuccs()) { + DepNode &succNode = succLink->GetTo(); + succNode.DescreaseValidPredsSize(); + if (succNode.GetValidPredsSize() == 0) { + readyList.emplace_back(&succNode); + succNode.SetState(kReady); + + /* Set eStart. */ + if (updateEStart) { + uint32 maxEstart = 0; + for (auto predLink : succNode.GetPreds()) { + DepNode &predNode = predLink->GetFrom(); + uint32 eStart = predNode.GetSchedCycle() + predLink->GetLatency(); + maxEstart = (maxEstart < eStart ? eStart : maxEstart); + } + succNode.SetEStart(maxEstart); + } + } + } +} + +/* For every node of nodes, dump it's Depdence information. */ +void AArch64Schedule::DumpDepGraph(const MapleVector &nodes) const { + for (auto node : nodes) { + depAnalysis->DumpDepNode(*node); + LogInfo::MapleLogger() << "---------- preds ----------" << "\n"; + for (auto pred : node->GetPreds()) { + depAnalysis->DumpDepLink(*pred, &(pred->GetFrom())); + } + LogInfo::MapleLogger() << "---------- succs ----------" << "\n"; + for (auto succ : node->GetSuccs()) { + depAnalysis->DumpDepLink(*succ, &(succ->GetTo())); + } + LogInfo::MapleLogger() << "---------------------------" << "\n"; + } +} + +/* For every node of nodes, dump it's schedule time according simulate type and instruction information. */ +void AArch64Schedule::DumpScheduleResult(const MapleVector &nodes, SimulateType type) const { + for (auto node : nodes) { + LogInfo::MapleLogger() << "cycle[ "; + switch (type) { + case kListSchedule: + LogInfo::MapleLogger() << node->GetSchedCycle(); + break; + case kBruteForce: + LogInfo::MapleLogger() << node->GetBruteForceSchedCycle(); + break; + case kSimulateOnly: + LogInfo::MapleLogger() << node->GetSimulateCycle(); + break; + } + LogInfo::MapleLogger() << " ] "; + node->GetInsn()->Dump(); + LogInfo::MapleLogger() << "\n"; + } +} + +/* Print bb's dependence dot graph information to a file. */ +void AArch64Schedule::GenerateDot(const BB &bb, const MapleVector &nodes) const { + std::streambuf *coutBuf = std::cout.rdbuf(); /* keep original cout buffer */ + std::ofstream dgFile; + std::streambuf *buf = dgFile.rdbuf(); + std::cout.rdbuf(buf); + + /* construct the file name */ + std::string fileName; + fileName.append(phaseName); + fileName.append("_"); + fileName.append(cgFunc.GetName()); + fileName.append("_BB"); + auto str = std::to_string(bb.GetId()); + fileName.append(str); + fileName.append("_dep_graph.dot"); + + dgFile.open(fileName.c_str(), std::ios::trunc); + if (!dgFile.is_open()) { + LogInfo::MapleLogger(kLlWarn) << "fileName:" << fileName << " open failure.\n"; + return; + } + dgFile << "digraph {\n"; + for (auto node : nodes) { + for (auto succ : node->GetSuccs()) { + dgFile << "insn" << node->GetInsn() << " -> " << "insn" << succ->GetTo().GetInsn(); + dgFile << " ["; + if (succ->GetDepType() == kDependenceTypeTrue) { + dgFile << "color=red,"; + } + dgFile << "label= \"" << succ->GetLatency() << "\""; + dgFile << "];\n"; + } + } + + for (auto node : nodes) { + MOperator mOp = node->GetInsn()->GetMachineOpcode(); + const AArch64MD *md = &AArch64CG::kMd[mOp]; + dgFile << "insn" << node->GetInsn() << "["; + dgFile << "shape=box,label= \" " << node->GetInsn()->GetId() << ":\n"; + dgFile << "{ "; + dgFile << md->name << "\n"; + dgFile << "}\"];\n"; + } + dgFile << "}\n"; + dgFile.flush(); + dgFile.close(); + std::cout.rdbuf(coutBuf); +} + +/* Do brute force scheduling and dump scheduling information */ +void AArch64Schedule::BruteForceScheduling(const BB &bb) { + LogInfo::MapleLogger() << "\n\n$$ Function: " << cgFunc.GetName(); + LogInfo::MapleLogger() << "\n BB id = " << bb.GetId() << "; nodes.size = " << nodes.size() << "\n"; + + constexpr uint32 maxBruteForceNum = 50; + if (nodes.size() < maxBruteForceNum) { + GenerateDot(bb, nodes); + uint32 maxBruteForceCycle = DoBruteForceSchedule(); + MapleVector bruteNodes = nodes; + uint32 maxSchedCycle = DoSchedule(); + if (maxBruteForceCycle < maxSchedCycle) { + LogInfo::MapleLogger() << "maxBruteForceCycle = " << maxBruteForceCycle << "; maxSchedCycle = "; + LogInfo::MapleLogger() << maxSchedCycle << "\n"; + LogInfo::MapleLogger() << "\n ## Dump dependence graph ## " << "\n"; + DumpDepGraph(nodes); + LogInfo::MapleLogger() << "\n ** Dump bruteForce scheduling result." << "\n"; + DumpScheduleResult(bruteNodes, kBruteForce); + LogInfo::MapleLogger() << "\n ^^ Dump list scheduling result." << "\n"; + DumpScheduleResult(nodes, kListSchedule); + } + } else { + LogInfo::MapleLogger() << "Skip BruteForce scheduling." << "\n"; + DoSchedule(); + } +} + +/* Do simulate scheduling and dump scheduling information */ +void AArch64Schedule::SimulateScheduling(const BB &bb) { + uint32 originCycle = SimulateOnly(); + MapleVector oldNodes = nodes; + uint32 schedCycle = DoSchedule(); + if (originCycle < schedCycle) { + LogInfo::MapleLogger() << "Worse cycle [ " << (schedCycle - originCycle) << " ]; "; + LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = "; + LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size(); + LogInfo::MapleLogger() << "; $$ Function: " << cgFunc.GetName(); + LogInfo::MapleLogger() << "; BB id = " << bb.GetId() << "\n"; + LogInfo::MapleLogger() << "\n ** Dump original result." << "\n"; + DumpScheduleResult(oldNodes, kSimulateOnly); + LogInfo::MapleLogger() << "\n ^^ Dump list scheduling result." << "\n"; + DumpScheduleResult(nodes, kListSchedule); + } else if (originCycle > schedCycle) { + LogInfo::MapleLogger() << "Advance cycle [ " << (originCycle - schedCycle) << " ]; "; + LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = "; + LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size(); + LogInfo::MapleLogger() << "; $$ Function: " << cgFunc.GetName(); + LogInfo::MapleLogger() << "; BB id = " << bb.GetId() << "\n"; + } else { + LogInfo::MapleLogger() << "Equal cycle [ 0 ]; originCycle = " << originCycle; + LogInfo::MapleLogger() << " ], ignore. nodes.size = " << nodes.size() << "\n"; + } +} + +/* + * A local list scheduling. + * Schedule insns in basic blocks. + */ +void AArch64Schedule::ListScheduling(bool beforeRA) { + InitIDAndLoc(); + + mad = Globals::GetInstance()->GetMAD(); + if (beforeRA) { + RegPressure::SetMaxRegClassNum(kRegisterLast); + } + depAnalysis = memPool.New(cgFunc, memPool, *mad, beforeRA); + + FOR_ALL_BB(bb, &cgFunc) { + depAnalysis->Run(*bb, nodes); + + if (LIST_SCHED_DUMP_REF) { + GenerateDot(*bb, nodes); + DumpDepGraph(nodes); + } + if (beforeRA) { + RegPressureScheduling(*bb, nodes); + } else { + ClinitPairOpt(); + MemoryAccessPairOpt(); + if (CGOptions::IsDruteForceSched()) { + BruteForceScheduling(*bb); + } else if (CGOptions::IsSimulateSched()) { + SimulateScheduling(*bb); + } else { + DoSchedule(); + } + } + + FinalizeScheduling(*bb, *depAnalysis); + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_strldr.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_strldr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..81aa2954edbccc2660837b087284dea55e8f9418 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_strldr.cpp @@ -0,0 +1,326 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_strldr.h" +#include "riscv64_reaching.h" +#include "riscv64_cgfunc.h" +#include "common_utils.h" + +namespace maplebe { +using namespace maple; + +static MOperator SelectMovMop(bool isFloatOrSIMD, bool is64Bit) { + return isFloatOrSIMD ? (is64Bit ? MOP_xvmovd : MOP_xvmovs) + : (is64Bit ? MOP_xmovrr : MOP_wmovrr); +} + +void AArch64StoreLoadOpt::Run() { + /* if the number of BB is too large, don't optimize. */ + if (cgFunc.NumBBs() > kMaxBBNum || cgFunc.GetRD()->GetMaxInsnNO() > kMaxInsnNum) { + return; + } + DoStoreLoadOpt(); +} + +/* + * Transfer: store x100, [MEM] + * ... // May exist branches. + * load x200, [MEM] + * ==> + * OPT_VERSION_STR_LIVE: + * store x100, [MEM] + * ... // May exist branches. if x100 not dead here. + * mov x200, x100 + * OPT_VERSION_STR_DIE: + * store x100, [MEM] + * mov x9000(new reg), x100 + * ... // May exist branches. if x100 dead here. + * mov x200, x9000 + * Params: + * strInsn: indicate store insn. + * strSrcIdx: index of source register operand of store insn. (x100 in this example) + * memSeq: represent first memOpreand or second memOperand + * memUseInsnSet: insns using memOperand + */ +void AArch64StoreLoadOpt::DoLoadToMoveTransfer(Insn &strInsn, short strSrcIdx, + short memSeq, const InsnSet &memUseInsnSet) { + /* stp instruction need two registers, str only need one register */ + ASSERT(strSrcIdx < kDivide2, "CG internal error."); + /* Find x100's definition insn. */ + InsnSet regDefInsnSet = cgFunc.GetRD()->FindDefForRegOpnd(strInsn, strSrcIdx); + ASSERT(!regDefInsnSet.empty(), "RegOperand is used before defined"); + if (regDefInsnSet.size() != 1) { + return; + } + for (auto *ldrInsn : memUseInsnSet) { + if (!ldrInsn->IsLoad() || (ldrInsn->GetResultNum() > 1) || ldrInsn->GetBB()->IsCleanup()) { + continue; + } + + /* ldr x200, [mem], mem index is 1, x200 index is 0 */ + InsnSet memDefInsnSet = cgFunc.GetRD()->FindDefForMemOpnd(*ldrInsn, kInsnSecondOpnd); + ASSERT(!memDefInsnSet.empty(), "load insn should have definitions."); + /* If load has multiple definition, continue. */ + if (memDefInsnSet.size() > 1) { + continue; + } + + Operand &resOpnd = ldrInsn->GetOperand(kInsnFirstOpnd); + Operand &srcOpnd = strInsn.GetOperand(strSrcIdx); + ASSERT(resOpnd.GetSize() == srcOpnd.GetSize(), "For stack location, the size of src and dst should be same."); + + auto &resRegOpnd = static_cast(resOpnd); + auto &srcRegOpnd = static_cast(srcOpnd); + if (resRegOpnd.GetRegisterType() != srcRegOpnd.GetRegisterType()) { + continue; + } + + /* Check if use operand of store is live at load insn. */ + if (cgFunc.GetRD()->RegIsLiveBetweenInsn(srcRegOpnd.GetRegisterNumber(), strInsn, *ldrInsn)) { + GenerateMoveLiveInsn(resRegOpnd, srcRegOpnd, *ldrInsn, strInsn, memSeq); + } else { + GenerateMoveDeadInsn(resRegOpnd, srcRegOpnd, *ldrInsn, strInsn, memSeq); + } + + if (CG_DEBUG_FUNC(&cgFunc)) { + LogInfo::MapleLogger() << "Do store-load optimization 1: str version"; + LogInfo::MapleLogger() << cgFunc.GetName() << '\n'; + LogInfo::MapleLogger() << "Store insn: "; + strInsn.Dump(); + LogInfo::MapleLogger() << "Load insn: "; + ldrInsn->Dump(); + } + } +} + +void AArch64StoreLoadOpt::GenerateMoveLiveInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq) { + MOperator movMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + Insn *movInsn = nullptr; + if (str2MovMap[&strInsn][memSeq] != nullptr) { + Insn *movInsnOfStr = str2MovMap[&strInsn][memSeq]; + auto &vregOpnd = static_cast(movInsnOfStr->GetOperand(kInsnFirstOpnd)); + movInsn = &cgFunc.GetCG()->BuildInstruction(movMop, resRegOpnd, vregOpnd); + } else { + movInsn = &cgFunc.GetCG()->BuildInstruction(movMop, resRegOpnd, srcRegOpnd); + } + movInsn->SetId(ldrInsn.GetId()); + ldrInsn.GetBB()->ReplaceInsn(ldrInsn, *movInsn); + /* Add comment. */ + MapleString newComment = ldrInsn.GetComment(); + if (strInsn.IsStorePair()) { + newComment += "; stp-load live version."; + } else { + newComment += "; str-load live version."; + } + movInsn->SetComment(newComment); + cgFunc.GetRD()->InitGenUse(*ldrInsn.GetBB(), false); +} + +void AArch64StoreLoadOpt::GenerateMoveDeadInsn(RegOperand &resRegOpnd, RegOperand &srcRegOpnd, + Insn &ldrInsn, Insn &strInsn, short memSeq) { + Insn *newMovInsn = nullptr; + RegOperand *vregOpnd = nullptr; + + if (str2MovMap[&strInsn][memSeq] == nullptr) { + RegType regTy = srcRegOpnd.IsOfFloatOrSIMDClass() ? kRegTyFloat : kRegTyInt; + regno_t vRegNO = + cgFunc.NewVReg(regTy, srcRegOpnd.GetSize() <= k32BitSize ? k4ByteSize : k8ByteSize); + /* generate a new vreg, check if the size of DataInfo is big enough */ + if (vRegNO >= cgFunc.GetRD()->GetRegSize(*strInsn.GetBB())) { + cgFunc.GetRD()->EnlargeRegCapacity(vRegNO); + } + vregOpnd = &cgFunc.CreateVirtualRegisterOperand(vRegNO); + MOperator newMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + newMovInsn = &cgFunc.GetCG()->BuildInstruction(newMop, *vregOpnd, srcRegOpnd); + newMovInsn->SetId(strInsn.GetId() + memSeq + 1); + strInsn.GetBB()->InsertInsnAfter(strInsn, *newMovInsn); + str2MovMap[&strInsn][memSeq] = newMovInsn; + /* update DataInfo */ + cgFunc.GetRD()->UpdateInOut(*strInsn.GetBB(), true); + } else { + newMovInsn = str2MovMap[&strInsn][memSeq]; + vregOpnd = &static_cast(newMovInsn->GetOperand(kInsnFirstOpnd)); + } + MOperator movMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + Insn &movInsn = cgFunc.GetCG()->BuildInstruction(movMop, resRegOpnd, *vregOpnd); + movInsn.SetId(ldrInsn.GetId()); + ldrInsn.GetBB()->ReplaceInsn(ldrInsn, movInsn); + + /* Add comment. */ + MapleString newComment = ldrInsn.GetComment(); + if (strInsn.IsStorePair()) { + newComment += "; stp-load die version."; + } else { + newComment += "; str-load die version."; + } + movInsn.SetComment(newComment); + cgFunc.GetRD()->InitGenUse(*ldrInsn.GetBB(), false); +} + +/* + * Transfer: store wzr, [MEM] + * ... // May exist branches. + * load x200, [MEM] + * ==> + * OPT_VERSION_STP_ZERO / OPT_VERSION_STR_ZERO: + * store wzr, [MEM] + * ... // May exist branches. if x100 not dead here. + * mov x200, wzr + * + * Params: + * stInsn: indicate store insn. + * strSrcIdx: index of source register operand of store insn. (wzr in this example) + * memUseInsnSet: insns using memOperand + */ +void AArch64StoreLoadOpt::DoLoadZeroToMoveTransfer(const Insn &strInsn, short strSrcIdx, + const InsnSet &memUseInsnSet) const { + /* comment for strInsn should be only added once */ + for (auto *ldrInsn : memUseInsnSet) { + /* Currently we don't support useInsn is ldp insn. */ + if (!ldrInsn->IsLoad() || ldrInsn->GetResultNum() > 1) { + continue; + } + /* ldr reg, [mem], the index of [mem] is 1 */ + InsnSet defInsnForUseInsns = cgFunc.GetRD()->FindDefForMemOpnd(*ldrInsn, 1); + /* If load has multiple definition, continue. */ + if (defInsnForUseInsns.size() > 1) { + continue; + } + + auto &resOpnd = ldrInsn->GetOperand(0); + auto &srcOpnd = strInsn.GetOperand(strSrcIdx); + + ASSERT(resOpnd.GetSize() == srcOpnd.GetSize(), "For stack location, the size of src and dst should be same."); + RegOperand &resRegOpnd = static_cast(resOpnd); + MOperator movMop = SelectMovMop(resRegOpnd.IsOfFloatOrSIMDClass(), resRegOpnd.GetSize() == k64BitSize); + Insn &movInsn = cgFunc.GetCG()->BuildInstruction(movMop, resOpnd, srcOpnd); + movInsn.SetId(ldrInsn->GetId()); + ldrInsn->GetBB()->ReplaceInsn(*ldrInsn, movInsn); + + /* Add comment. */ + MapleString newComment = ldrInsn->GetComment(); + newComment += ", str-load zero version"; + movInsn.SetComment(newComment); + } +} + +bool AArch64StoreLoadOpt::CheckStoreOpCode(MOperator opCode) const { + switch (opCode) { + case MOP_wstr: + case MOP_xstr: + case MOP_sstr: + case MOP_dstr: + case MOP_wstp: + case MOP_xstp: + case MOP_sstp: + case MOP_dstp: + case MOP_wstrb: + case MOP_wstrh: + return true; + default: + return false; + } +} + +/* + * Optimize: store x100, [MEM] + * ... // May exist branches. + * load x200, [MEM] + * ==> + * OPT_VERSION_STP_LIVE / OPT_VERSION_STR_LIVE: + * store x100, [MEM] + * ... // May exist branches. if x100 not dead here. + * mov x200, x100 + * OPT_VERSION_STP_DIE / OPT_VERSION_STR_DIE: + * store x100, [MEM] + * mov x9000(new reg), x100 + * ... // May exist branches. if x100 dead here. + * mov x200, x9000 + * + * Note: x100 may be wzr/xzr registers. + */ +void AArch64StoreLoadOpt::DoStoreLoadOpt() { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS(insn, bb) { + if (!insn->IsMachineInstruction() || !insn->IsStore() || !CheckStoreOpCode(insn->GetMachineOpcode())) { + continue; + } + if (insn->IsStorePair()) { + ProcessStrPair(*insn); + continue; + } + ProcessStr(*insn); + } + } +} + +void AArch64StoreLoadOpt::ProcessStrPair(Insn &insn) { + const short memIndex = 2; + short regIndex = 0; + Operand &opnd = insn.GetOperand(memIndex); + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if ((base == nullptr) || !(cgFunc.GetRD()->IsFrameReg(*base))) { + return; + } + ASSERT(memOpnd.GetIndexRegister() == nullptr, "frame MemOperand must not be exist register index"); + InsnSet memUseInsnSet; + for (int i = 0; i != kMaxMovNum; ++i) { + memUseInsnSet.clear(); + if (i == 0) { + regIndex = 0; + memUseInsnSet = cgFunc.GetRD()->FindUseForMemOpnd(insn, memIndex); + } else { + regIndex = 1; + memUseInsnSet = cgFunc.GetRD()->FindUseForMemOpnd(insn, memIndex, true); + } + if (memUseInsnSet.empty()) { + return; + } + auto ®Opnd = static_cast(insn.GetOperand(regIndex)); + if (regOpnd.IsZeroRegister()) { + DoLoadZeroToMoveTransfer(insn, regIndex, memUseInsnSet); + } else { + DoLoadToMoveTransfer(insn, regIndex, i, memUseInsnSet); + } + } +} + +void AArch64StoreLoadOpt::ProcessStr(Insn &insn) { + /* str x100, [mem], mem index is 1, x100 index is 0; */ + const short memIndex = 1; + const short regIndex = 0; + Operand &opnd = insn.GetOperand(memIndex); + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + if ((base == nullptr) || !(cgFunc.GetRD()->IsFrameReg(*base))) { + return; + } + ASSERT(memOpnd.GetIndexRegister() == nullptr, "frame MemOperand must not be exist register index"); + + InsnSet memUseInsnSet = cgFunc.GetRD()->FindUseForMemOpnd(insn, memIndex); + if (memUseInsnSet.empty()) { + return; + } + + auto *regOpnd = static_cast(insn.GetOpnd(regIndex)); + CHECK_NULL_FATAL(regOpnd); + if (regOpnd->IsZeroRegister()) { + DoLoadZeroToMoveTransfer(insn, regIndex, memUseInsnSet); + } else { + DoLoadToMoveTransfer(insn, regIndex, 0, memUseInsnSet); + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/riscv64_yieldpoint.cpp b/src/mapleall/maple_be/src/cg/riscv64/riscv64_yieldpoint.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39e70f5f369c2a71febc1b7b1d78d5447d725ff8 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/riscv64_yieldpoint.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "riscv64_yieldpoint.h" +#include "riscv64_cgfunc.h" + +namespace maplebe { +using namespace maple; + +void AArch64YieldPointInsertion::Run() { + InsertYieldPoint(); +} + +void AArch64YieldPointInsertion::InsertYieldPoint() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + std::string refQueueName = "Ljava_2Flang_2Fref_2FReference_3B_7C_3Cinit_3E_7C_" + "28Ljava_2Flang_2FObject_3BLjava_2Flang_2Fref_2FReferenceQueue_3B_29V"; + if (!CGOptions::IsGCOnly() && (aarchCGFunc->GetName() == refQueueName)) { + /* skip insert yieldpoint in reference constructor, avoid rc verify issue */ + ASSERT(aarchCGFunc->GetYieldPointInsn() != nullptr, "the entry yield point has been inserted"); + aarchCGFunc->GetYieldPointInsn()->GetBB()->RemoveInsn(*aarchCGFunc->GetYieldPointInsn()); + return; + } + + /* + * do not insert yieldpoint in function that not saved X30 into stack, + * because X30 will be changed after yieldpoint is taken. + */ + if (!aarchCGFunc->GetHasProEpilogue()) { + ASSERT (aarchCGFunc->GetYieldPointInsn() != nullptr, "the entry yield point has been inserted"); + aarchCGFunc->GetYieldPointInsn()->GetBB()->RemoveInsn(*aarchCGFunc->GetYieldPointInsn()); + return; + } + /* skip if no GetFirstbb(). */ + if (aarchCGFunc->GetFirstBB() == nullptr) { + return; + } + /* + * The yield point in the entry of the GetFunction() is inserted just after the initialization + * of localrefvars in HandleRCCall. + * for BBs after firstbb. + */ + for (BB *bb = aarchCGFunc->GetFirstBB()->GetNext(); bb != nullptr; bb = bb->GetNext()) { + /* insert a yieldpoint at beginning if BB is BackEdgeDest. */ + if (bb->IsBackEdgeDest()) { + aarchCGFunc->GetDummyBB()->ClearInsns(); + aarchCGFunc->GenerateYieldpoint(*aarchCGFunc->GetDummyBB()); + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + } + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/riscv64/valid_bitmask_imm.txt b/src/mapleall/maple_be/src/cg/riscv64/valid_bitmask_imm.txt new file mode 100755 index 0000000000000000000000000000000000000000..53a6135b6ebd4f570d728df66a9b2584a3a677ef --- /dev/null +++ b/src/mapleall/maple_be/src/cg/riscv64/valid_bitmask_imm.txt @@ -0,0 +1,5372 @@ +0x5555555555555555, +0xaaaaaaaaaaaaaaaa, +0x1111111111111111, +0x2222222222222222, +0x4444444444444444, +0x8888888888888888, +0x3333333333333333, +0x6666666666666666, +0xcccccccccccccccc, +0x9999999999999999, +0x7777777777777777, +0xeeeeeeeeeeeeeeee, +0xdddddddddddddddd, +0xbbbbbbbbbbbbbbbb, +0x0101010101010101, +0x0202020202020202, +0x0404040404040404, +0x0808080808080808, +0x1010101010101010, +0x2020202020202020, +0x4040404040404040, +0x8080808080808080, +0x0303030303030303, +0x0606060606060606, +0x0c0c0c0c0c0c0c0c, +0x1818181818181818, +0x3030303030303030, +0x6060606060606060, +0xc0c0c0c0c0c0c0c0, +0x8181818181818181, +0x0707070707070707, +0x0e0e0e0e0e0e0e0e, +0x1c1c1c1c1c1c1c1c, +0x3838383838383838, +0x7070707070707070, +0xe0e0e0e0e0e0e0e0, +0xc1c1c1c1c1c1c1c1, +0x8383838383838383, +0x0f0f0f0f0f0f0f0f, +0x1e1e1e1e1e1e1e1e, +0x3c3c3c3c3c3c3c3c, +0x7878787878787878, +0xf0f0f0f0f0f0f0f0, +0xe1e1e1e1e1e1e1e1, +0xc3c3c3c3c3c3c3c3, +0x8787878787878787, +0x1f1f1f1f1f1f1f1f, +0x3e3e3e3e3e3e3e3e, +0x7c7c7c7c7c7c7c7c, +0xf8f8f8f8f8f8f8f8, +0xf1f1f1f1f1f1f1f1, +0xe3e3e3e3e3e3e3e3, +0xc7c7c7c7c7c7c7c7, +0x8f8f8f8f8f8f8f8f, +0x3f3f3f3f3f3f3f3f, +0x7e7e7e7e7e7e7e7e, +0xfcfcfcfcfcfcfcfc, +0xf9f9f9f9f9f9f9f9, +0xf3f3f3f3f3f3f3f3, +0xe7e7e7e7e7e7e7e7, +0xcfcfcfcfcfcfcfcf, +0x9f9f9f9f9f9f9f9f, +0x7f7f7f7f7f7f7f7f, +0xfefefefefefefefe, +0xfdfdfdfdfdfdfdfd, +0xfbfbfbfbfbfbfbfb, +0xf7f7f7f7f7f7f7f7, +0xefefefefefefefef, +0xdfdfdfdfdfdfdfdf, +0xbfbfbfbfbfbfbfbf, +0x0001000100010001, +0x0002000200020002, +0x0004000400040004, +0x0008000800080008, +0x0010001000100010, +0x0020002000200020, +0x0040004000400040, +0x0080008000800080, +0x0100010001000100, +0x0200020002000200, +0x0400040004000400, +0x0800080008000800, +0x1000100010001000, +0x2000200020002000, +0x4000400040004000, +0x8000800080008000, +0x0003000300030003, +0x0006000600060006, +0x000c000c000c000c, +0x0018001800180018, +0x0030003000300030, +0x0060006000600060, +0x00c000c000c000c0, +0x0180018001800180, +0x0300030003000300, +0x0600060006000600, +0x0c000c000c000c00, +0x1800180018001800, +0x3000300030003000, +0x6000600060006000, +0xc000c000c000c000, +0x8001800180018001, +0x0007000700070007, +0x000e000e000e000e, +0x001c001c001c001c, +0x0038003800380038, +0x0070007000700070, +0x00e000e000e000e0, +0x01c001c001c001c0, +0x0380038003800380, +0x0700070007000700, +0x0e000e000e000e00, +0x1c001c001c001c00, +0x3800380038003800, +0x7000700070007000, +0xe000e000e000e000, +0xc001c001c001c001, +0x8003800380038003, +0x000f000f000f000f, +0x001e001e001e001e, +0x003c003c003c003c, +0x0078007800780078, +0x00f000f000f000f0, +0x01e001e001e001e0, +0x03c003c003c003c0, +0x0780078007800780, +0x0f000f000f000f00, +0x1e001e001e001e00, +0x3c003c003c003c00, +0x7800780078007800, +0xf000f000f000f000, +0xe001e001e001e001, +0xc003c003c003c003, +0x8007800780078007, +0x001f001f001f001f, +0x003e003e003e003e, +0x007c007c007c007c, +0x00f800f800f800f8, +0x01f001f001f001f0, +0x03e003e003e003e0, +0x07c007c007c007c0, +0x0f800f800f800f80, +0x1f001f001f001f00, +0x3e003e003e003e00, +0x7c007c007c007c00, +0xf800f800f800f800, +0xf001f001f001f001, +0xe003e003e003e003, +0xc007c007c007c007, +0x800f800f800f800f, +0x003f003f003f003f, +0x007e007e007e007e, +0x00fc00fc00fc00fc, +0x01f801f801f801f8, +0x03f003f003f003f0, +0x07e007e007e007e0, +0x0fc00fc00fc00fc0, +0x1f801f801f801f80, +0x3f003f003f003f00, +0x7e007e007e007e00, +0xfc00fc00fc00fc00, +0xf801f801f801f801, +0xf003f003f003f003, +0xe007e007e007e007, +0xc00fc00fc00fc00f, +0x801f801f801f801f, +0x007f007f007f007f, +0x00fe00fe00fe00fe, +0x01fc01fc01fc01fc, +0x03f803f803f803f8, +0x07f007f007f007f0, +0x0fe00fe00fe00fe0, +0x1fc01fc01fc01fc0, +0x3f803f803f803f80, +0x7f007f007f007f00, +0xfe00fe00fe00fe00, +0xfc01fc01fc01fc01, +0xf803f803f803f803, +0xf007f007f007f007, +0xe00fe00fe00fe00f, +0xc01fc01fc01fc01f, +0x803f803f803f803f, +0x00ff00ff00ff00ff, +0x01fe01fe01fe01fe, +0x03fc03fc03fc03fc, +0x07f807f807f807f8, +0x0ff00ff00ff00ff0, +0x1fe01fe01fe01fe0, +0x3fc03fc03fc03fc0, +0x7f807f807f807f80, +0xff00ff00ff00ff00, +0xfe01fe01fe01fe01, +0xfc03fc03fc03fc03, +0xf807f807f807f807, +0xf00ff00ff00ff00f, +0xe01fe01fe01fe01f, +0xc03fc03fc03fc03f, +0x807f807f807f807f, +0x01ff01ff01ff01ff, +0x03fe03fe03fe03fe, +0x07fc07fc07fc07fc, +0x0ff80ff80ff80ff8, +0x1ff01ff01ff01ff0, +0x3fe03fe03fe03fe0, +0x7fc07fc07fc07fc0, +0xff80ff80ff80ff80, +0xff01ff01ff01ff01, +0xfe03fe03fe03fe03, +0xfc07fc07fc07fc07, +0xf80ff80ff80ff80f, +0xf01ff01ff01ff01f, +0xe03fe03fe03fe03f, +0xc07fc07fc07fc07f, +0x80ff80ff80ff80ff, +0x03ff03ff03ff03ff, +0x07fe07fe07fe07fe, +0x0ffc0ffc0ffc0ffc, +0x1ff81ff81ff81ff8, +0x3ff03ff03ff03ff0, +0x7fe07fe07fe07fe0, +0xffc0ffc0ffc0ffc0, +0xff81ff81ff81ff81, +0xff03ff03ff03ff03, +0xfe07fe07fe07fe07, +0xfc0ffc0ffc0ffc0f, +0xf81ff81ff81ff81f, +0xf03ff03ff03ff03f, +0xe07fe07fe07fe07f, +0xc0ffc0ffc0ffc0ff, +0x81ff81ff81ff81ff, +0x07ff07ff07ff07ff, +0x0ffe0ffe0ffe0ffe, +0x1ffc1ffc1ffc1ffc, +0x3ff83ff83ff83ff8, +0x7ff07ff07ff07ff0, +0xffe0ffe0ffe0ffe0, +0xffc1ffc1ffc1ffc1, +0xff83ff83ff83ff83, +0xff07ff07ff07ff07, +0xfe0ffe0ffe0ffe0f, +0xfc1ffc1ffc1ffc1f, +0xf83ff83ff83ff83f, +0xf07ff07ff07ff07f, +0xe0ffe0ffe0ffe0ff, +0xc1ffc1ffc1ffc1ff, +0x83ff83ff83ff83ff, +0x0fff0fff0fff0fff, +0x1ffe1ffe1ffe1ffe, +0x3ffc3ffc3ffc3ffc, +0x7ff87ff87ff87ff8, +0xfff0fff0fff0fff0, +0xffe1ffe1ffe1ffe1, +0xffc3ffc3ffc3ffc3, +0xff87ff87ff87ff87, +0xff0fff0fff0fff0f, +0xfe1ffe1ffe1ffe1f, +0xfc3ffc3ffc3ffc3f, +0xf87ff87ff87ff87f, +0xf0fff0fff0fff0ff, +0xe1ffe1ffe1ffe1ff, +0xc3ffc3ffc3ffc3ff, +0x87ff87ff87ff87ff, +0x1fff1fff1fff1fff, +0x3ffe3ffe3ffe3ffe, +0x7ffc7ffc7ffc7ffc, +0xfff8fff8fff8fff8, +0xfff1fff1fff1fff1, +0xffe3ffe3ffe3ffe3, +0xffc7ffc7ffc7ffc7, +0xff8fff8fff8fff8f, +0xff1fff1fff1fff1f, +0xfe3ffe3ffe3ffe3f, +0xfc7ffc7ffc7ffc7f, +0xf8fff8fff8fff8ff, +0xf1fff1fff1fff1ff, +0xe3ffe3ffe3ffe3ff, +0xc7ffc7ffc7ffc7ff, +0x8fff8fff8fff8fff, +0x3fff3fff3fff3fff, +0x7ffe7ffe7ffe7ffe, +0xfffcfffcfffcfffc, +0xfff9fff9fff9fff9, +0xfff3fff3fff3fff3, +0xffe7ffe7ffe7ffe7, +0xffcfffcfffcfffcf, +0xff9fff9fff9fff9f, +0xff3fff3fff3fff3f, +0xfe7ffe7ffe7ffe7f, +0xfcfffcfffcfffcff, +0xf9fff9fff9fff9ff, +0xf3fff3fff3fff3ff, +0xe7ffe7ffe7ffe7ff, +0xcfffcfffcfffcfff, +0x9fff9fff9fff9fff, +0x7fff7fff7fff7fff, +0xfffefffefffefffe, +0xfffdfffdfffdfffd, +0xfffbfffbfffbfffb, +0xfff7fff7fff7fff7, +0xffefffefffefffef, +0xffdfffdfffdfffdf, +0xffbfffbfffbfffbf, +0xff7fff7fff7fff7f, +0xfefffefffefffeff, +0xfdfffdfffdfffdff, +0xfbfffbfffbfffbff, +0xf7fff7fff7fff7ff, +0xefffefffefffefff, +0xdfffdfffdfffdfff, +0xbfffbfffbfffbfff, +0x0000000100000001, +0x0000000200000002, +0x0000000400000004, +0x0000000800000008, +0x0000001000000010, +0x0000002000000020, +0x0000004000000040, +0x0000008000000080, +0x0000010000000100, +0x0000020000000200, +0x0000040000000400, +0x0000080000000800, +0x0000100000001000, +0x0000200000002000, +0x0000400000004000, +0x0000800000008000, +0x0001000000010000, +0x0002000000020000, +0x0004000000040000, +0x0008000000080000, +0x0010000000100000, +0x0020000000200000, +0x0040000000400000, +0x0080000000800000, +0x0100000001000000, +0x0200000002000000, +0x0400000004000000, +0x0800000008000000, +0x1000000010000000, +0x2000000020000000, +0x4000000040000000, +0x8000000080000000, +0x0000000300000003, +0x0000000600000006, +0x0000000c0000000c, +0x0000001800000018, +0x0000003000000030, +0x0000006000000060, +0x000000c0000000c0, +0x0000018000000180, +0x0000030000000300, +0x0000060000000600, +0x00000c0000000c00, +0x0000180000001800, +0x0000300000003000, +0x0000600000006000, +0x0000c0000000c000, +0x0001800000018000, +0x0003000000030000, +0x0006000000060000, +0x000c0000000c0000, +0x0018000000180000, +0x0030000000300000, +0x0060000000600000, +0x00c0000000c00000, +0x0180000001800000, +0x0300000003000000, +0x0600000006000000, +0x0c0000000c000000, +0x1800000018000000, +0x3000000030000000, +0x6000000060000000, +0xc0000000c0000000, +0x8000000180000001, +0x0000000700000007, +0x0000000e0000000e, +0x0000001c0000001c, +0x0000003800000038, +0x0000007000000070, +0x000000e0000000e0, +0x000001c0000001c0, +0x0000038000000380, +0x0000070000000700, +0x00000e0000000e00, +0x00001c0000001c00, +0x0000380000003800, +0x0000700000007000, +0x0000e0000000e000, +0x0001c0000001c000, +0x0003800000038000, +0x0007000000070000, +0x000e0000000e0000, +0x001c0000001c0000, +0x0038000000380000, +0x0070000000700000, +0x00e0000000e00000, +0x01c0000001c00000, +0x0380000003800000, +0x0700000007000000, +0x0e0000000e000000, +0x1c0000001c000000, +0x3800000038000000, +0x7000000070000000, +0xe0000000e0000000, +0xc0000001c0000001, +0x8000000380000003, +0x0000000f0000000f, +0x0000001e0000001e, +0x0000003c0000003c, +0x0000007800000078, +0x000000f0000000f0, +0x000001e0000001e0, +0x000003c0000003c0, +0x0000078000000780, +0x00000f0000000f00, +0x00001e0000001e00, +0x00003c0000003c00, +0x0000780000007800, +0x0000f0000000f000, +0x0001e0000001e000, +0x0003c0000003c000, +0x0007800000078000, +0x000f0000000f0000, +0x001e0000001e0000, +0x003c0000003c0000, +0x0078000000780000, +0x00f0000000f00000, +0x01e0000001e00000, +0x03c0000003c00000, +0x0780000007800000, +0x0f0000000f000000, +0x1e0000001e000000, +0x3c0000003c000000, +0x7800000078000000, +0xf0000000f0000000, +0xe0000001e0000001, +0xc0000003c0000003, +0x8000000780000007, +0x0000001f0000001f, +0x0000003e0000003e, +0x0000007c0000007c, +0x000000f8000000f8, +0x000001f0000001f0, +0x000003e0000003e0, +0x000007c0000007c0, +0x00000f8000000f80, +0x00001f0000001f00, +0x00003e0000003e00, +0x00007c0000007c00, +0x0000f8000000f800, +0x0001f0000001f000, +0x0003e0000003e000, +0x0007c0000007c000, +0x000f8000000f8000, +0x001f0000001f0000, +0x003e0000003e0000, +0x007c0000007c0000, +0x00f8000000f80000, +0x01f0000001f00000, +0x03e0000003e00000, +0x07c0000007c00000, +0x0f8000000f800000, +0x1f0000001f000000, +0x3e0000003e000000, +0x7c0000007c000000, +0xf8000000f8000000, +0xf0000001f0000001, +0xe0000003e0000003, +0xc0000007c0000007, +0x8000000f8000000f, +0x0000003f0000003f, +0x0000007e0000007e, +0x000000fc000000fc, +0x000001f8000001f8, +0x000003f0000003f0, +0x000007e0000007e0, +0x00000fc000000fc0, +0x00001f8000001f80, +0x00003f0000003f00, +0x00007e0000007e00, +0x0000fc000000fc00, +0x0001f8000001f800, +0x0003f0000003f000, +0x0007e0000007e000, +0x000fc000000fc000, +0x001f8000001f8000, +0x003f0000003f0000, +0x007e0000007e0000, +0x00fc000000fc0000, +0x01f8000001f80000, +0x03f0000003f00000, +0x07e0000007e00000, +0x0fc000000fc00000, +0x1f8000001f800000, +0x3f0000003f000000, +0x7e0000007e000000, +0xfc000000fc000000, +0xf8000001f8000001, +0xf0000003f0000003, +0xe0000007e0000007, +0xc000000fc000000f, +0x8000001f8000001f, +0x0000007f0000007f, +0x000000fe000000fe, +0x000001fc000001fc, +0x000003f8000003f8, +0x000007f0000007f0, +0x00000fe000000fe0, +0x00001fc000001fc0, +0x00003f8000003f80, +0x00007f0000007f00, +0x0000fe000000fe00, +0x0001fc000001fc00, +0x0003f8000003f800, +0x0007f0000007f000, +0x000fe000000fe000, +0x001fc000001fc000, +0x003f8000003f8000, +0x007f0000007f0000, +0x00fe000000fe0000, +0x01fc000001fc0000, +0x03f8000003f80000, +0x07f0000007f00000, +0x0fe000000fe00000, +0x1fc000001fc00000, +0x3f8000003f800000, +0x7f0000007f000000, +0xfe000000fe000000, +0xfc000001fc000001, +0xf8000003f8000003, +0xf0000007f0000007, +0xe000000fe000000f, +0xc000001fc000001f, +0x8000003f8000003f, +0x000000ff000000ff, +0x000001fe000001fe, +0x000003fc000003fc, +0x000007f8000007f8, +0x00000ff000000ff0, +0x00001fe000001fe0, +0x00003fc000003fc0, +0x00007f8000007f80, +0x0000ff000000ff00, +0x0001fe000001fe00, +0x0003fc000003fc00, +0x0007f8000007f800, +0x000ff000000ff000, +0x001fe000001fe000, +0x003fc000003fc000, +0x007f8000007f8000, +0x00ff000000ff0000, +0x01fe000001fe0000, +0x03fc000003fc0000, +0x07f8000007f80000, +0x0ff000000ff00000, +0x1fe000001fe00000, +0x3fc000003fc00000, +0x7f8000007f800000, +0xff000000ff000000, +0xfe000001fe000001, +0xfc000003fc000003, +0xf8000007f8000007, +0xf000000ff000000f, +0xe000001fe000001f, +0xc000003fc000003f, +0x8000007f8000007f, +0x000001ff000001ff, +0x000003fe000003fe, +0x000007fc000007fc, +0x00000ff800000ff8, +0x00001ff000001ff0, +0x00003fe000003fe0, +0x00007fc000007fc0, +0x0000ff800000ff80, +0x0001ff000001ff00, +0x0003fe000003fe00, +0x0007fc000007fc00, +0x000ff800000ff800, +0x001ff000001ff000, +0x003fe000003fe000, +0x007fc000007fc000, +0x00ff800000ff8000, +0x01ff000001ff0000, +0x03fe000003fe0000, +0x07fc000007fc0000, +0x0ff800000ff80000, +0x1ff000001ff00000, +0x3fe000003fe00000, +0x7fc000007fc00000, +0xff800000ff800000, +0xff000001ff000001, +0xfe000003fe000003, +0xfc000007fc000007, +0xf800000ff800000f, +0xf000001ff000001f, +0xe000003fe000003f, +0xc000007fc000007f, +0x800000ff800000ff, +0x000003ff000003ff, +0x000007fe000007fe, +0x00000ffc00000ffc, +0x00001ff800001ff8, +0x00003ff000003ff0, +0x00007fe000007fe0, +0x0000ffc00000ffc0, +0x0001ff800001ff80, +0x0003ff000003ff00, +0x0007fe000007fe00, +0x000ffc00000ffc00, +0x001ff800001ff800, +0x003ff000003ff000, +0x007fe000007fe000, +0x00ffc00000ffc000, +0x01ff800001ff8000, +0x03ff000003ff0000, +0x07fe000007fe0000, +0x0ffc00000ffc0000, +0x1ff800001ff80000, +0x3ff000003ff00000, +0x7fe000007fe00000, +0xffc00000ffc00000, +0xff800001ff800001, +0xff000003ff000003, +0xfe000007fe000007, +0xfc00000ffc00000f, +0xf800001ff800001f, +0xf000003ff000003f, +0xe000007fe000007f, +0xc00000ffc00000ff, +0x800001ff800001ff, +0x000007ff000007ff, +0x00000ffe00000ffe, +0x00001ffc00001ffc, +0x00003ff800003ff8, +0x00007ff000007ff0, +0x0000ffe00000ffe0, +0x0001ffc00001ffc0, +0x0003ff800003ff80, +0x0007ff000007ff00, +0x000ffe00000ffe00, +0x001ffc00001ffc00, +0x003ff800003ff800, +0x007ff000007ff000, +0x00ffe00000ffe000, +0x01ffc00001ffc000, +0x03ff800003ff8000, +0x07ff000007ff0000, +0x0ffe00000ffe0000, +0x1ffc00001ffc0000, +0x3ff800003ff80000, +0x7ff000007ff00000, +0xffe00000ffe00000, +0xffc00001ffc00001, +0xff800003ff800003, +0xff000007ff000007, +0xfe00000ffe00000f, +0xfc00001ffc00001f, +0xf800003ff800003f, +0xf000007ff000007f, +0xe00000ffe00000ff, +0xc00001ffc00001ff, +0x800003ff800003ff, +0x00000fff00000fff, +0x00001ffe00001ffe, +0x00003ffc00003ffc, +0x00007ff800007ff8, +0x0000fff00000fff0, +0x0001ffe00001ffe0, +0x0003ffc00003ffc0, +0x0007ff800007ff80, +0x000fff00000fff00, +0x001ffe00001ffe00, +0x003ffc00003ffc00, +0x007ff800007ff800, +0x00fff00000fff000, +0x01ffe00001ffe000, +0x03ffc00003ffc000, +0x07ff800007ff8000, +0x0fff00000fff0000, +0x1ffe00001ffe0000, +0x3ffc00003ffc0000, +0x7ff800007ff80000, +0xfff00000fff00000, +0xffe00001ffe00001, +0xffc00003ffc00003, +0xff800007ff800007, +0xff00000fff00000f, +0xfe00001ffe00001f, +0xfc00003ffc00003f, +0xf800007ff800007f, +0xf00000fff00000ff, +0xe00001ffe00001ff, +0xc00003ffc00003ff, +0x800007ff800007ff, +0x00001fff00001fff, +0x00003ffe00003ffe, +0x00007ffc00007ffc, +0x0000fff80000fff8, +0x0001fff00001fff0, +0x0003ffe00003ffe0, +0x0007ffc00007ffc0, +0x000fff80000fff80, +0x001fff00001fff00, +0x003ffe00003ffe00, +0x007ffc00007ffc00, +0x00fff80000fff800, +0x01fff00001fff000, +0x03ffe00003ffe000, +0x07ffc00007ffc000, +0x0fff80000fff8000, +0x1fff00001fff0000, +0x3ffe00003ffe0000, +0x7ffc00007ffc0000, +0xfff80000fff80000, +0xfff00001fff00001, +0xffe00003ffe00003, +0xffc00007ffc00007, +0xff80000fff80000f, +0xff00001fff00001f, +0xfe00003ffe00003f, +0xfc00007ffc00007f, +0xf80000fff80000ff, +0xf00001fff00001ff, +0xe00003ffe00003ff, +0xc00007ffc00007ff, +0x80000fff80000fff, +0x00003fff00003fff, +0x00007ffe00007ffe, +0x0000fffc0000fffc, +0x0001fff80001fff8, +0x0003fff00003fff0, +0x0007ffe00007ffe0, +0x000fffc0000fffc0, +0x001fff80001fff80, +0x003fff00003fff00, +0x007ffe00007ffe00, +0x00fffc0000fffc00, +0x01fff80001fff800, +0x03fff00003fff000, +0x07ffe00007ffe000, +0x0fffc0000fffc000, +0x1fff80001fff8000, +0x3fff00003fff0000, +0x7ffe00007ffe0000, +0xfffc0000fffc0000, +0xfff80001fff80001, +0xfff00003fff00003, +0xffe00007ffe00007, +0xffc0000fffc0000f, +0xff80001fff80001f, +0xff00003fff00003f, +0xfe00007ffe00007f, +0xfc0000fffc0000ff, +0xf80001fff80001ff, +0xf00003fff00003ff, +0xe00007ffe00007ff, +0xc0000fffc0000fff, +0x80001fff80001fff, +0x00007fff00007fff, +0x0000fffe0000fffe, +0x0001fffc0001fffc, +0x0003fff80003fff8, +0x0007fff00007fff0, +0x000fffe0000fffe0, +0x001fffc0001fffc0, +0x003fff80003fff80, +0x007fff00007fff00, +0x00fffe0000fffe00, +0x01fffc0001fffc00, +0x03fff80003fff800, +0x07fff00007fff000, +0x0fffe0000fffe000, +0x1fffc0001fffc000, +0x3fff80003fff8000, +0x7fff00007fff0000, +0xfffe0000fffe0000, +0xfffc0001fffc0001, +0xfff80003fff80003, +0xfff00007fff00007, +0xffe0000fffe0000f, +0xffc0001fffc0001f, +0xff80003fff80003f, +0xff00007fff00007f, +0xfe0000fffe0000ff, +0xfc0001fffc0001ff, +0xf80003fff80003ff, +0xf00007fff00007ff, +0xe0000fffe0000fff, +0xc0001fffc0001fff, +0x80003fff80003fff, +0x0000ffff0000ffff, +0x0001fffe0001fffe, +0x0003fffc0003fffc, +0x0007fff80007fff8, +0x000ffff0000ffff0, +0x001fffe0001fffe0, +0x003fffc0003fffc0, +0x007fff80007fff80, +0x00ffff0000ffff00, +0x01fffe0001fffe00, +0x03fffc0003fffc00, +0x07fff80007fff800, +0x0ffff0000ffff000, +0x1fffe0001fffe000, +0x3fffc0003fffc000, +0x7fff80007fff8000, +0xffff0000ffff0000, +0xfffe0001fffe0001, +0xfffc0003fffc0003, +0xfff80007fff80007, +0xfff0000ffff0000f, +0xffe0001fffe0001f, +0xffc0003fffc0003f, +0xff80007fff80007f, +0xff0000ffff0000ff, +0xfe0001fffe0001ff, +0xfc0003fffc0003ff, +0xf80007fff80007ff, +0xf0000ffff0000fff, +0xe0001fffe0001fff, +0xc0003fffc0003fff, +0x80007fff80007fff, +0x0001ffff0001ffff, +0x0003fffe0003fffe, +0x0007fffc0007fffc, +0x000ffff8000ffff8, +0x001ffff0001ffff0, +0x003fffe0003fffe0, +0x007fffc0007fffc0, +0x00ffff8000ffff80, +0x01ffff0001ffff00, +0x03fffe0003fffe00, +0x07fffc0007fffc00, +0x0ffff8000ffff800, +0x1ffff0001ffff000, +0x3fffe0003fffe000, +0x7fffc0007fffc000, +0xffff8000ffff8000, +0xffff0001ffff0001, +0xfffe0003fffe0003, +0xfffc0007fffc0007, +0xfff8000ffff8000f, +0xfff0001ffff0001f, +0xffe0003fffe0003f, +0xffc0007fffc0007f, +0xff8000ffff8000ff, +0xff0001ffff0001ff, +0xfe0003fffe0003ff, +0xfc0007fffc0007ff, +0xf8000ffff8000fff, +0xf0001ffff0001fff, +0xe0003fffe0003fff, +0xc0007fffc0007fff, +0x8000ffff8000ffff, +0x0003ffff0003ffff, +0x0007fffe0007fffe, +0x000ffffc000ffffc, +0x001ffff8001ffff8, +0x003ffff0003ffff0, +0x007fffe0007fffe0, +0x00ffffc000ffffc0, +0x01ffff8001ffff80, +0x03ffff0003ffff00, +0x07fffe0007fffe00, +0x0ffffc000ffffc00, +0x1ffff8001ffff800, +0x3ffff0003ffff000, +0x7fffe0007fffe000, +0xffffc000ffffc000, +0xffff8001ffff8001, +0xffff0003ffff0003, +0xfffe0007fffe0007, +0xfffc000ffffc000f, +0xfff8001ffff8001f, +0xfff0003ffff0003f, +0xffe0007fffe0007f, +0xffc000ffffc000ff, +0xff8001ffff8001ff, +0xff0003ffff0003ff, +0xfe0007fffe0007ff, +0xfc000ffffc000fff, +0xf8001ffff8001fff, +0xf0003ffff0003fff, +0xe0007fffe0007fff, +0xc000ffffc000ffff, +0x8001ffff8001ffff, +0x0007ffff0007ffff, +0x000ffffe000ffffe, +0x001ffffc001ffffc, +0x003ffff8003ffff8, +0x007ffff0007ffff0, +0x00ffffe000ffffe0, +0x01ffffc001ffffc0, +0x03ffff8003ffff80, +0x07ffff0007ffff00, +0x0ffffe000ffffe00, +0x1ffffc001ffffc00, +0x3ffff8003ffff800, +0x7ffff0007ffff000, +0xffffe000ffffe000, +0xffffc001ffffc001, +0xffff8003ffff8003, +0xffff0007ffff0007, +0xfffe000ffffe000f, +0xfffc001ffffc001f, +0xfff8003ffff8003f, +0xfff0007ffff0007f, +0xffe000ffffe000ff, +0xffc001ffffc001ff, +0xff8003ffff8003ff, +0xff0007ffff0007ff, +0xfe000ffffe000fff, +0xfc001ffffc001fff, +0xf8003ffff8003fff, +0xf0007ffff0007fff, +0xe000ffffe000ffff, +0xc001ffffc001ffff, +0x8003ffff8003ffff, +0x000fffff000fffff, +0x001ffffe001ffffe, +0x003ffffc003ffffc, +0x007ffff8007ffff8, +0x00fffff000fffff0, +0x01ffffe001ffffe0, +0x03ffffc003ffffc0, +0x07ffff8007ffff80, +0x0fffff000fffff00, +0x1ffffe001ffffe00, +0x3ffffc003ffffc00, +0x7ffff8007ffff800, +0xfffff000fffff000, +0xffffe001ffffe001, +0xffffc003ffffc003, +0xffff8007ffff8007, +0xffff000fffff000f, +0xfffe001ffffe001f, +0xfffc003ffffc003f, +0xfff8007ffff8007f, +0xfff000fffff000ff, +0xffe001ffffe001ff, +0xffc003ffffc003ff, +0xff8007ffff8007ff, +0xff000fffff000fff, +0xfe001ffffe001fff, +0xfc003ffffc003fff, +0xf8007ffff8007fff, +0xf000fffff000ffff, +0xe001ffffe001ffff, +0xc003ffffc003ffff, +0x8007ffff8007ffff, +0x001fffff001fffff, +0x003ffffe003ffffe, +0x007ffffc007ffffc, +0x00fffff800fffff8, +0x01fffff001fffff0, +0x03ffffe003ffffe0, +0x07ffffc007ffffc0, +0x0fffff800fffff80, +0x1fffff001fffff00, +0x3ffffe003ffffe00, +0x7ffffc007ffffc00, +0xfffff800fffff800, +0xfffff001fffff001, +0xffffe003ffffe003, +0xffffc007ffffc007, +0xffff800fffff800f, +0xffff001fffff001f, +0xfffe003ffffe003f, +0xfffc007ffffc007f, +0xfff800fffff800ff, +0xfff001fffff001ff, +0xffe003ffffe003ff, +0xffc007ffffc007ff, +0xff800fffff800fff, +0xff001fffff001fff, +0xfe003ffffe003fff, +0xfc007ffffc007fff, +0xf800fffff800ffff, +0xf001fffff001ffff, +0xe003ffffe003ffff, +0xc007ffffc007ffff, +0x800fffff800fffff, +0x003fffff003fffff, +0x007ffffe007ffffe, +0x00fffffc00fffffc, +0x01fffff801fffff8, +0x03fffff003fffff0, +0x07ffffe007ffffe0, +0x0fffffc00fffffc0, +0x1fffff801fffff80, +0x3fffff003fffff00, +0x7ffffe007ffffe00, +0xfffffc00fffffc00, +0xfffff801fffff801, +0xfffff003fffff003, +0xffffe007ffffe007, +0xffffc00fffffc00f, +0xffff801fffff801f, +0xffff003fffff003f, +0xfffe007ffffe007f, +0xfffc00fffffc00ff, +0xfff801fffff801ff, +0xfff003fffff003ff, +0xffe007ffffe007ff, +0xffc00fffffc00fff, +0xff801fffff801fff, +0xff003fffff003fff, +0xfe007ffffe007fff, +0xfc00fffffc00ffff, +0xf801fffff801ffff, +0xf003fffff003ffff, +0xe007ffffe007ffff, +0xc00fffffc00fffff, +0x801fffff801fffff, +0x007fffff007fffff, +0x00fffffe00fffffe, +0x01fffffc01fffffc, +0x03fffff803fffff8, +0x07fffff007fffff0, +0x0fffffe00fffffe0, +0x1fffffc01fffffc0, +0x3fffff803fffff80, +0x7fffff007fffff00, +0xfffffe00fffffe00, +0xfffffc01fffffc01, +0xfffff803fffff803, +0xfffff007fffff007, +0xffffe00fffffe00f, +0xffffc01fffffc01f, +0xffff803fffff803f, +0xffff007fffff007f, +0xfffe00fffffe00ff, +0xfffc01fffffc01ff, +0xfff803fffff803ff, +0xfff007fffff007ff, +0xffe00fffffe00fff, +0xffc01fffffc01fff, +0xff803fffff803fff, +0xff007fffff007fff, +0xfe00fffffe00ffff, +0xfc01fffffc01ffff, +0xf803fffff803ffff, +0xf007fffff007ffff, +0xe00fffffe00fffff, +0xc01fffffc01fffff, +0x803fffff803fffff, +0x00ffffff00ffffff, +0x01fffffe01fffffe, +0x03fffffc03fffffc, +0x07fffff807fffff8, +0x0ffffff00ffffff0, +0x1fffffe01fffffe0, +0x3fffffc03fffffc0, +0x7fffff807fffff80, +0xffffff00ffffff00, +0xfffffe01fffffe01, +0xfffffc03fffffc03, +0xfffff807fffff807, +0xfffff00ffffff00f, +0xffffe01fffffe01f, +0xffffc03fffffc03f, +0xffff807fffff807f, +0xffff00ffffff00ff, +0xfffe01fffffe01ff, +0xfffc03fffffc03ff, +0xfff807fffff807ff, +0xfff00ffffff00fff, +0xffe01fffffe01fff, +0xffc03fffffc03fff, +0xff807fffff807fff, +0xff00ffffff00ffff, +0xfe01fffffe01ffff, +0xfc03fffffc03ffff, +0xf807fffff807ffff, +0xf00ffffff00fffff, +0xe01fffffe01fffff, +0xc03fffffc03fffff, +0x807fffff807fffff, +0x01ffffff01ffffff, +0x03fffffe03fffffe, +0x07fffffc07fffffc, +0x0ffffff80ffffff8, +0x1ffffff01ffffff0, +0x3fffffe03fffffe0, +0x7fffffc07fffffc0, +0xffffff80ffffff80, +0xffffff01ffffff01, +0xfffffe03fffffe03, +0xfffffc07fffffc07, +0xfffff80ffffff80f, +0xfffff01ffffff01f, +0xffffe03fffffe03f, +0xffffc07fffffc07f, +0xffff80ffffff80ff, +0xffff01ffffff01ff, +0xfffe03fffffe03ff, +0xfffc07fffffc07ff, +0xfff80ffffff80fff, +0xfff01ffffff01fff, +0xffe03fffffe03fff, +0xffc07fffffc07fff, +0xff80ffffff80ffff, +0xff01ffffff01ffff, +0xfe03fffffe03ffff, +0xfc07fffffc07ffff, +0xf80ffffff80fffff, +0xf01ffffff01fffff, +0xe03fffffe03fffff, +0xc07fffffc07fffff, +0x80ffffff80ffffff, +0x03ffffff03ffffff, +0x07fffffe07fffffe, +0x0ffffffc0ffffffc, +0x1ffffff81ffffff8, +0x3ffffff03ffffff0, +0x7fffffe07fffffe0, +0xffffffc0ffffffc0, +0xffffff81ffffff81, +0xffffff03ffffff03, +0xfffffe07fffffe07, +0xfffffc0ffffffc0f, +0xfffff81ffffff81f, +0xfffff03ffffff03f, +0xffffe07fffffe07f, +0xffffc0ffffffc0ff, +0xffff81ffffff81ff, +0xffff03ffffff03ff, +0xfffe07fffffe07ff, +0xfffc0ffffffc0fff, +0xfff81ffffff81fff, +0xfff03ffffff03fff, +0xffe07fffffe07fff, +0xffc0ffffffc0ffff, +0xff81ffffff81ffff, +0xff03ffffff03ffff, +0xfe07fffffe07ffff, +0xfc0ffffffc0fffff, +0xf81ffffff81fffff, +0xf03ffffff03fffff, +0xe07fffffe07fffff, +0xc0ffffffc0ffffff, +0x81ffffff81ffffff, +0x07ffffff07ffffff, +0x0ffffffe0ffffffe, +0x1ffffffc1ffffffc, +0x3ffffff83ffffff8, +0x7ffffff07ffffff0, +0xffffffe0ffffffe0, +0xffffffc1ffffffc1, +0xffffff83ffffff83, +0xffffff07ffffff07, +0xfffffe0ffffffe0f, +0xfffffc1ffffffc1f, +0xfffff83ffffff83f, +0xfffff07ffffff07f, +0xffffe0ffffffe0ff, +0xffffc1ffffffc1ff, +0xffff83ffffff83ff, +0xffff07ffffff07ff, +0xfffe0ffffffe0fff, +0xfffc1ffffffc1fff, +0xfff83ffffff83fff, +0xfff07ffffff07fff, +0xffe0ffffffe0ffff, +0xffc1ffffffc1ffff, +0xff83ffffff83ffff, +0xff07ffffff07ffff, +0xfe0ffffffe0fffff, +0xfc1ffffffc1fffff, +0xf83ffffff83fffff, +0xf07ffffff07fffff, +0xe0ffffffe0ffffff, +0xc1ffffffc1ffffff, +0x83ffffff83ffffff, +0x0fffffff0fffffff, +0x1ffffffe1ffffffe, +0x3ffffffc3ffffffc, +0x7ffffff87ffffff8, +0xfffffff0fffffff0, +0xffffffe1ffffffe1, +0xffffffc3ffffffc3, +0xffffff87ffffff87, +0xffffff0fffffff0f, +0xfffffe1ffffffe1f, +0xfffffc3ffffffc3f, +0xfffff87ffffff87f, +0xfffff0fffffff0ff, +0xffffe1ffffffe1ff, +0xffffc3ffffffc3ff, +0xffff87ffffff87ff, +0xffff0fffffff0fff, +0xfffe1ffffffe1fff, +0xfffc3ffffffc3fff, +0xfff87ffffff87fff, +0xfff0fffffff0ffff, +0xffe1ffffffe1ffff, +0xffc3ffffffc3ffff, +0xff87ffffff87ffff, +0xff0fffffff0fffff, +0xfe1ffffffe1fffff, +0xfc3ffffffc3fffff, +0xf87ffffff87fffff, +0xf0fffffff0ffffff, +0xe1ffffffe1ffffff, +0xc3ffffffc3ffffff, +0x87ffffff87ffffff, +0x1fffffff1fffffff, +0x3ffffffe3ffffffe, +0x7ffffffc7ffffffc, +0xfffffff8fffffff8, +0xfffffff1fffffff1, +0xffffffe3ffffffe3, +0xffffffc7ffffffc7, +0xffffff8fffffff8f, +0xffffff1fffffff1f, +0xfffffe3ffffffe3f, +0xfffffc7ffffffc7f, +0xfffff8fffffff8ff, +0xfffff1fffffff1ff, +0xffffe3ffffffe3ff, +0xffffc7ffffffc7ff, +0xffff8fffffff8fff, +0xffff1fffffff1fff, +0xfffe3ffffffe3fff, +0xfffc7ffffffc7fff, +0xfff8fffffff8ffff, +0xfff1fffffff1ffff, +0xffe3ffffffe3ffff, +0xffc7ffffffc7ffff, +0xff8fffffff8fffff, +0xff1fffffff1fffff, +0xfe3ffffffe3fffff, +0xfc7ffffffc7fffff, +0xf8fffffff8ffffff, +0xf1fffffff1ffffff, +0xe3ffffffe3ffffff, +0xc7ffffffc7ffffff, +0x8fffffff8fffffff, +0x3fffffff3fffffff, +0x7ffffffe7ffffffe, +0xfffffffcfffffffc, +0xfffffff9fffffff9, +0xfffffff3fffffff3, +0xffffffe7ffffffe7, +0xffffffcfffffffcf, +0xffffff9fffffff9f, +0xffffff3fffffff3f, +0xfffffe7ffffffe7f, +0xfffffcfffffffcff, +0xfffff9fffffff9ff, +0xfffff3fffffff3ff, +0xffffe7ffffffe7ff, +0xffffcfffffffcfff, +0xffff9fffffff9fff, +0xffff3fffffff3fff, +0xfffe7ffffffe7fff, +0xfffcfffffffcffff, +0xfff9fffffff9ffff, +0xfff3fffffff3ffff, +0xffe7ffffffe7ffff, +0xffcfffffffcfffff, +0xff9fffffff9fffff, +0xff3fffffff3fffff, +0xfe7ffffffe7fffff, +0xfcfffffffcffffff, +0xf9fffffff9ffffff, +0xf3fffffff3ffffff, +0xe7ffffffe7ffffff, +0xcfffffffcfffffff, +0x9fffffff9fffffff, +0x7fffffff7fffffff, +0xfffffffefffffffe, +0xfffffffdfffffffd, +0xfffffffbfffffffb, +0xfffffff7fffffff7, +0xffffffefffffffef, +0xffffffdfffffffdf, +0xffffffbfffffffbf, +0xffffff7fffffff7f, +0xfffffefffffffeff, +0xfffffdfffffffdff, +0xfffffbfffffffbff, +0xfffff7fffffff7ff, +0xffffefffffffefff, +0xffffdfffffffdfff, +0xffffbfffffffbfff, +0xffff7fffffff7fff, +0xfffefffffffeffff, +0xfffdfffffffdffff, +0xfffbfffffffbffff, +0xfff7fffffff7ffff, +0xffefffffffefffff, +0xffdfffffffdfffff, +0xffbfffffffbfffff, +0xff7fffffff7fffff, +0xfefffffffeffffff, +0xfdfffffffdffffff, +0xfbfffffffbffffff, +0xf7fffffff7ffffff, +0xefffffffefffffff, +0xdfffffffdfffffff, +0xbfffffffbfffffff, +0x0000000000000001, +0x0000000000000002, +0x0000000000000004, +0x0000000000000008, +0x0000000000000010, +0x0000000000000020, +0x0000000000000040, +0x0000000000000080, +0x0000000000000100, +0x0000000000000200, +0x0000000000000400, +0x0000000000000800, +0x0000000000001000, +0x0000000000002000, +0x0000000000004000, +0x0000000000008000, +0x0000000000010000, +0x0000000000020000, +0x0000000000040000, +0x0000000000080000, +0x0000000000100000, +0x0000000000200000, +0x0000000000400000, +0x0000000000800000, +0x0000000001000000, +0x0000000002000000, +0x0000000004000000, +0x0000000008000000, +0x0000000010000000, +0x0000000020000000, +0x0000000040000000, +0x0000000080000000, +0x0000000100000000, +0x0000000200000000, +0x0000000400000000, +0x0000000800000000, +0x0000001000000000, +0x0000002000000000, +0x0000004000000000, +0x0000008000000000, +0x0000010000000000, +0x0000020000000000, +0x0000040000000000, +0x0000080000000000, +0x0000100000000000, +0x0000200000000000, +0x0000400000000000, +0x0000800000000000, +0x0001000000000000, +0x0002000000000000, +0x0004000000000000, +0x0008000000000000, +0x0010000000000000, +0x0020000000000000, +0x0040000000000000, +0x0080000000000000, +0x0100000000000000, +0x0200000000000000, +0x0400000000000000, +0x0800000000000000, +0x1000000000000000, +0x2000000000000000, +0x4000000000000000, +0x8000000000000000, +0x0000000000000003, +0x0000000000000006, +0x000000000000000c, +0x0000000000000018, +0x0000000000000030, +0x0000000000000060, +0x00000000000000c0, +0x0000000000000180, +0x0000000000000300, +0x0000000000000600, +0x0000000000000c00, +0x0000000000001800, +0x0000000000003000, +0x0000000000006000, +0x000000000000c000, +0x0000000000018000, +0x0000000000030000, +0x0000000000060000, +0x00000000000c0000, +0x0000000000180000, +0x0000000000300000, +0x0000000000600000, +0x0000000000c00000, +0x0000000001800000, +0x0000000003000000, +0x0000000006000000, +0x000000000c000000, +0x0000000018000000, +0x0000000030000000, +0x0000000060000000, +0x00000000c0000000, +0x0000000180000000, +0x0000000300000000, +0x0000000600000000, +0x0000000c00000000, +0x0000001800000000, +0x0000003000000000, +0x0000006000000000, +0x000000c000000000, +0x0000018000000000, +0x0000030000000000, +0x0000060000000000, +0x00000c0000000000, +0x0000180000000000, +0x0000300000000000, +0x0000600000000000, +0x0000c00000000000, +0x0001800000000000, +0x0003000000000000, +0x0006000000000000, +0x000c000000000000, +0x0018000000000000, +0x0030000000000000, +0x0060000000000000, +0x00c0000000000000, +0x0180000000000000, +0x0300000000000000, +0x0600000000000000, +0x0c00000000000000, +0x1800000000000000, +0x3000000000000000, +0x6000000000000000, +0xc000000000000000, +0x8000000000000001, +0x0000000000000007, +0x000000000000000e, +0x000000000000001c, +0x0000000000000038, +0x0000000000000070, +0x00000000000000e0, +0x00000000000001c0, +0x0000000000000380, +0x0000000000000700, +0x0000000000000e00, +0x0000000000001c00, +0x0000000000003800, +0x0000000000007000, +0x000000000000e000, +0x000000000001c000, +0x0000000000038000, +0x0000000000070000, +0x00000000000e0000, +0x00000000001c0000, +0x0000000000380000, +0x0000000000700000, +0x0000000000e00000, +0x0000000001c00000, +0x0000000003800000, +0x0000000007000000, +0x000000000e000000, +0x000000001c000000, +0x0000000038000000, +0x0000000070000000, +0x00000000e0000000, +0x00000001c0000000, +0x0000000380000000, +0x0000000700000000, +0x0000000e00000000, +0x0000001c00000000, +0x0000003800000000, +0x0000007000000000, +0x000000e000000000, +0x000001c000000000, +0x0000038000000000, +0x0000070000000000, +0x00000e0000000000, +0x00001c0000000000, +0x0000380000000000, +0x0000700000000000, +0x0000e00000000000, +0x0001c00000000000, +0x0003800000000000, +0x0007000000000000, +0x000e000000000000, +0x001c000000000000, +0x0038000000000000, +0x0070000000000000, +0x00e0000000000000, +0x01c0000000000000, +0x0380000000000000, +0x0700000000000000, +0x0e00000000000000, +0x1c00000000000000, +0x3800000000000000, +0x7000000000000000, +0xe000000000000000, +0xc000000000000001, +0x8000000000000003, +0x000000000000000f, +0x000000000000001e, +0x000000000000003c, +0x0000000000000078, +0x00000000000000f0, +0x00000000000001e0, +0x00000000000003c0, +0x0000000000000780, +0x0000000000000f00, +0x0000000000001e00, +0x0000000000003c00, +0x0000000000007800, +0x000000000000f000, +0x000000000001e000, +0x000000000003c000, +0x0000000000078000, +0x00000000000f0000, +0x00000000001e0000, +0x00000000003c0000, +0x0000000000780000, +0x0000000000f00000, +0x0000000001e00000, +0x0000000003c00000, +0x0000000007800000, +0x000000000f000000, +0x000000001e000000, +0x000000003c000000, +0x0000000078000000, +0x00000000f0000000, +0x00000001e0000000, +0x00000003c0000000, +0x0000000780000000, +0x0000000f00000000, +0x0000001e00000000, +0x0000003c00000000, +0x0000007800000000, +0x000000f000000000, +0x000001e000000000, +0x000003c000000000, +0x0000078000000000, +0x00000f0000000000, +0x00001e0000000000, +0x00003c0000000000, +0x0000780000000000, +0x0000f00000000000, +0x0001e00000000000, +0x0003c00000000000, +0x0007800000000000, +0x000f000000000000, +0x001e000000000000, +0x003c000000000000, +0x0078000000000000, +0x00f0000000000000, +0x01e0000000000000, +0x03c0000000000000, +0x0780000000000000, +0x0f00000000000000, +0x1e00000000000000, +0x3c00000000000000, +0x7800000000000000, +0xf000000000000000, +0xe000000000000001, +0xc000000000000003, +0x8000000000000007, +0x000000000000001f, +0x000000000000003e, +0x000000000000007c, +0x00000000000000f8, +0x00000000000001f0, +0x00000000000003e0, +0x00000000000007c0, +0x0000000000000f80, +0x0000000000001f00, +0x0000000000003e00, +0x0000000000007c00, +0x000000000000f800, +0x000000000001f000, +0x000000000003e000, +0x000000000007c000, +0x00000000000f8000, +0x00000000001f0000, +0x00000000003e0000, +0x00000000007c0000, +0x0000000000f80000, +0x0000000001f00000, +0x0000000003e00000, +0x0000000007c00000, +0x000000000f800000, +0x000000001f000000, +0x000000003e000000, +0x000000007c000000, +0x00000000f8000000, +0x00000001f0000000, +0x00000003e0000000, +0x00000007c0000000, +0x0000000f80000000, +0x0000001f00000000, +0x0000003e00000000, +0x0000007c00000000, +0x000000f800000000, +0x000001f000000000, +0x000003e000000000, +0x000007c000000000, +0x00000f8000000000, +0x00001f0000000000, +0x00003e0000000000, +0x00007c0000000000, +0x0000f80000000000, +0x0001f00000000000, +0x0003e00000000000, +0x0007c00000000000, +0x000f800000000000, +0x001f000000000000, +0x003e000000000000, +0x007c000000000000, +0x00f8000000000000, +0x01f0000000000000, +0x03e0000000000000, +0x07c0000000000000, +0x0f80000000000000, +0x1f00000000000000, +0x3e00000000000000, +0x7c00000000000000, +0xf800000000000000, +0xf000000000000001, +0xe000000000000003, +0xc000000000000007, +0x800000000000000f, +0x000000000000003f, +0x000000000000007e, +0x00000000000000fc, +0x00000000000001f8, +0x00000000000003f0, +0x00000000000007e0, +0x0000000000000fc0, +0x0000000000001f80, +0x0000000000003f00, +0x0000000000007e00, +0x000000000000fc00, +0x000000000001f800, +0x000000000003f000, +0x000000000007e000, +0x00000000000fc000, +0x00000000001f8000, +0x00000000003f0000, +0x00000000007e0000, +0x0000000000fc0000, +0x0000000001f80000, +0x0000000003f00000, +0x0000000007e00000, +0x000000000fc00000, +0x000000001f800000, +0x000000003f000000, +0x000000007e000000, +0x00000000fc000000, +0x00000001f8000000, +0x00000003f0000000, +0x00000007e0000000, +0x0000000fc0000000, +0x0000001f80000000, +0x0000003f00000000, +0x0000007e00000000, +0x000000fc00000000, +0x000001f800000000, +0x000003f000000000, +0x000007e000000000, +0x00000fc000000000, +0x00001f8000000000, +0x00003f0000000000, +0x00007e0000000000, +0x0000fc0000000000, +0x0001f80000000000, +0x0003f00000000000, +0x0007e00000000000, +0x000fc00000000000, +0x001f800000000000, +0x003f000000000000, +0x007e000000000000, +0x00fc000000000000, +0x01f8000000000000, +0x03f0000000000000, +0x07e0000000000000, +0x0fc0000000000000, +0x1f80000000000000, +0x3f00000000000000, +0x7e00000000000000, +0xfc00000000000000, +0xf800000000000001, +0xf000000000000003, +0xe000000000000007, +0xc00000000000000f, +0x800000000000001f, +0x000000000000007f, +0x00000000000000fe, +0x00000000000001fc, +0x00000000000003f8, +0x00000000000007f0, +0x0000000000000fe0, +0x0000000000001fc0, +0x0000000000003f80, +0x0000000000007f00, +0x000000000000fe00, +0x000000000001fc00, +0x000000000003f800, +0x000000000007f000, +0x00000000000fe000, +0x00000000001fc000, +0x00000000003f8000, +0x00000000007f0000, +0x0000000000fe0000, +0x0000000001fc0000, +0x0000000003f80000, +0x0000000007f00000, +0x000000000fe00000, +0x000000001fc00000, +0x000000003f800000, +0x000000007f000000, +0x00000000fe000000, +0x00000001fc000000, +0x00000003f8000000, +0x00000007f0000000, +0x0000000fe0000000, +0x0000001fc0000000, +0x0000003f80000000, +0x0000007f00000000, +0x000000fe00000000, +0x000001fc00000000, +0x000003f800000000, +0x000007f000000000, +0x00000fe000000000, +0x00001fc000000000, +0x00003f8000000000, +0x00007f0000000000, +0x0000fe0000000000, +0x0001fc0000000000, +0x0003f80000000000, +0x0007f00000000000, +0x000fe00000000000, +0x001fc00000000000, +0x003f800000000000, +0x007f000000000000, +0x00fe000000000000, +0x01fc000000000000, +0x03f8000000000000, +0x07f0000000000000, +0x0fe0000000000000, +0x1fc0000000000000, +0x3f80000000000000, +0x7f00000000000000, +0xfe00000000000000, +0xfc00000000000001, +0xf800000000000003, +0xf000000000000007, +0xe00000000000000f, +0xc00000000000001f, +0x800000000000003f, +0x00000000000000ff, +0x00000000000001fe, +0x00000000000003fc, +0x00000000000007f8, +0x0000000000000ff0, +0x0000000000001fe0, +0x0000000000003fc0, +0x0000000000007f80, +0x000000000000ff00, +0x000000000001fe00, +0x000000000003fc00, +0x000000000007f800, +0x00000000000ff000, +0x00000000001fe000, +0x00000000003fc000, +0x00000000007f8000, +0x0000000000ff0000, +0x0000000001fe0000, +0x0000000003fc0000, +0x0000000007f80000, +0x000000000ff00000, +0x000000001fe00000, +0x000000003fc00000, +0x000000007f800000, +0x00000000ff000000, +0x00000001fe000000, +0x00000003fc000000, +0x00000007f8000000, +0x0000000ff0000000, +0x0000001fe0000000, +0x0000003fc0000000, +0x0000007f80000000, +0x000000ff00000000, +0x000001fe00000000, +0x000003fc00000000, +0x000007f800000000, +0x00000ff000000000, +0x00001fe000000000, +0x00003fc000000000, +0x00007f8000000000, +0x0000ff0000000000, +0x0001fe0000000000, +0x0003fc0000000000, +0x0007f80000000000, +0x000ff00000000000, +0x001fe00000000000, +0x003fc00000000000, +0x007f800000000000, +0x00ff000000000000, +0x01fe000000000000, +0x03fc000000000000, +0x07f8000000000000, +0x0ff0000000000000, +0x1fe0000000000000, +0x3fc0000000000000, +0x7f80000000000000, +0xff00000000000000, +0xfe00000000000001, +0xfc00000000000003, +0xf800000000000007, +0xf00000000000000f, +0xe00000000000001f, +0xc00000000000003f, +0x800000000000007f, +0x00000000000001ff, +0x00000000000003fe, +0x00000000000007fc, +0x0000000000000ff8, +0x0000000000001ff0, +0x0000000000003fe0, +0x0000000000007fc0, +0x000000000000ff80, +0x000000000001ff00, +0x000000000003fe00, +0x000000000007fc00, +0x00000000000ff800, +0x00000000001ff000, +0x00000000003fe000, +0x00000000007fc000, +0x0000000000ff8000, +0x0000000001ff0000, +0x0000000003fe0000, +0x0000000007fc0000, +0x000000000ff80000, +0x000000001ff00000, +0x000000003fe00000, +0x000000007fc00000, +0x00000000ff800000, +0x00000001ff000000, +0x00000003fe000000, +0x00000007fc000000, +0x0000000ff8000000, +0x0000001ff0000000, +0x0000003fe0000000, +0x0000007fc0000000, +0x000000ff80000000, +0x000001ff00000000, +0x000003fe00000000, +0x000007fc00000000, +0x00000ff800000000, +0x00001ff000000000, +0x00003fe000000000, +0x00007fc000000000, +0x0000ff8000000000, +0x0001ff0000000000, +0x0003fe0000000000, +0x0007fc0000000000, +0x000ff80000000000, +0x001ff00000000000, +0x003fe00000000000, +0x007fc00000000000, +0x00ff800000000000, +0x01ff000000000000, +0x03fe000000000000, +0x07fc000000000000, +0x0ff8000000000000, +0x1ff0000000000000, +0x3fe0000000000000, +0x7fc0000000000000, +0xff80000000000000, +0xff00000000000001, +0xfe00000000000003, +0xfc00000000000007, +0xf80000000000000f, +0xf00000000000001f, +0xe00000000000003f, +0xc00000000000007f, +0x80000000000000ff, +0x00000000000003ff, +0x00000000000007fe, +0x0000000000000ffc, +0x0000000000001ff8, +0x0000000000003ff0, +0x0000000000007fe0, +0x000000000000ffc0, +0x000000000001ff80, +0x000000000003ff00, +0x000000000007fe00, +0x00000000000ffc00, +0x00000000001ff800, +0x00000000003ff000, +0x00000000007fe000, +0x0000000000ffc000, +0x0000000001ff8000, +0x0000000003ff0000, +0x0000000007fe0000, +0x000000000ffc0000, +0x000000001ff80000, +0x000000003ff00000, +0x000000007fe00000, +0x00000000ffc00000, +0x00000001ff800000, +0x00000003ff000000, +0x00000007fe000000, +0x0000000ffc000000, +0x0000001ff8000000, +0x0000003ff0000000, +0x0000007fe0000000, +0x000000ffc0000000, +0x000001ff80000000, +0x000003ff00000000, +0x000007fe00000000, +0x00000ffc00000000, +0x00001ff800000000, +0x00003ff000000000, +0x00007fe000000000, +0x0000ffc000000000, +0x0001ff8000000000, +0x0003ff0000000000, +0x0007fe0000000000, +0x000ffc0000000000, +0x001ff80000000000, +0x003ff00000000000, +0x007fe00000000000, +0x00ffc00000000000, +0x01ff800000000000, +0x03ff000000000000, +0x07fe000000000000, +0x0ffc000000000000, +0x1ff8000000000000, +0x3ff0000000000000, +0x7fe0000000000000, +0xffc0000000000000, +0xff80000000000001, +0xff00000000000003, +0xfe00000000000007, +0xfc0000000000000f, +0xf80000000000001f, +0xf00000000000003f, +0xe00000000000007f, +0xc0000000000000ff, +0x80000000000001ff, +0x00000000000007ff, +0x0000000000000ffe, +0x0000000000001ffc, +0x0000000000003ff8, +0x0000000000007ff0, +0x000000000000ffe0, +0x000000000001ffc0, +0x000000000003ff80, +0x000000000007ff00, +0x00000000000ffe00, +0x00000000001ffc00, +0x00000000003ff800, +0x00000000007ff000, +0x0000000000ffe000, +0x0000000001ffc000, +0x0000000003ff8000, +0x0000000007ff0000, +0x000000000ffe0000, +0x000000001ffc0000, +0x000000003ff80000, +0x000000007ff00000, +0x00000000ffe00000, +0x00000001ffc00000, +0x00000003ff800000, +0x00000007ff000000, +0x0000000ffe000000, +0x0000001ffc000000, +0x0000003ff8000000, +0x0000007ff0000000, +0x000000ffe0000000, +0x000001ffc0000000, +0x000003ff80000000, +0x000007ff00000000, +0x00000ffe00000000, +0x00001ffc00000000, +0x00003ff800000000, +0x00007ff000000000, +0x0000ffe000000000, +0x0001ffc000000000, +0x0003ff8000000000, +0x0007ff0000000000, +0x000ffe0000000000, +0x001ffc0000000000, +0x003ff80000000000, +0x007ff00000000000, +0x00ffe00000000000, +0x01ffc00000000000, +0x03ff800000000000, +0x07ff000000000000, +0x0ffe000000000000, +0x1ffc000000000000, +0x3ff8000000000000, +0x7ff0000000000000, +0xffe0000000000000, +0xffc0000000000001, +0xff80000000000003, +0xff00000000000007, +0xfe0000000000000f, +0xfc0000000000001f, +0xf80000000000003f, +0xf00000000000007f, +0xe0000000000000ff, +0xc0000000000001ff, +0x80000000000003ff, +0x0000000000000fff, +0x0000000000001ffe, +0x0000000000003ffc, +0x0000000000007ff8, +0x000000000000fff0, +0x000000000001ffe0, +0x000000000003ffc0, +0x000000000007ff80, +0x00000000000fff00, +0x00000000001ffe00, +0x00000000003ffc00, +0x00000000007ff800, +0x0000000000fff000, +0x0000000001ffe000, +0x0000000003ffc000, +0x0000000007ff8000, +0x000000000fff0000, +0x000000001ffe0000, +0x000000003ffc0000, +0x000000007ff80000, +0x00000000fff00000, +0x00000001ffe00000, +0x00000003ffc00000, +0x00000007ff800000, +0x0000000fff000000, +0x0000001ffe000000, +0x0000003ffc000000, +0x0000007ff8000000, +0x000000fff0000000, +0x000001ffe0000000, +0x000003ffc0000000, +0x000007ff80000000, +0x00000fff00000000, +0x00001ffe00000000, +0x00003ffc00000000, +0x00007ff800000000, +0x0000fff000000000, +0x0001ffe000000000, +0x0003ffc000000000, +0x0007ff8000000000, +0x000fff0000000000, +0x001ffe0000000000, +0x003ffc0000000000, +0x007ff80000000000, +0x00fff00000000000, +0x01ffe00000000000, +0x03ffc00000000000, +0x07ff800000000000, +0x0fff000000000000, +0x1ffe000000000000, +0x3ffc000000000000, +0x7ff8000000000000, +0xfff0000000000000, +0xffe0000000000001, +0xffc0000000000003, +0xff80000000000007, +0xff0000000000000f, +0xfe0000000000001f, +0xfc0000000000003f, +0xf80000000000007f, +0xf0000000000000ff, +0xe0000000000001ff, +0xc0000000000003ff, +0x80000000000007ff, +0x0000000000001fff, +0x0000000000003ffe, +0x0000000000007ffc, +0x000000000000fff8, +0x000000000001fff0, +0x000000000003ffe0, +0x000000000007ffc0, +0x00000000000fff80, +0x00000000001fff00, +0x00000000003ffe00, +0x00000000007ffc00, +0x0000000000fff800, +0x0000000001fff000, +0x0000000003ffe000, +0x0000000007ffc000, +0x000000000fff8000, +0x000000001fff0000, +0x000000003ffe0000, +0x000000007ffc0000, +0x00000000fff80000, +0x00000001fff00000, +0x00000003ffe00000, +0x00000007ffc00000, +0x0000000fff800000, +0x0000001fff000000, +0x0000003ffe000000, +0x0000007ffc000000, +0x000000fff8000000, +0x000001fff0000000, +0x000003ffe0000000, +0x000007ffc0000000, +0x00000fff80000000, +0x00001fff00000000, +0x00003ffe00000000, +0x00007ffc00000000, +0x0000fff800000000, +0x0001fff000000000, +0x0003ffe000000000, +0x0007ffc000000000, +0x000fff8000000000, +0x001fff0000000000, +0x003ffe0000000000, +0x007ffc0000000000, +0x00fff80000000000, +0x01fff00000000000, +0x03ffe00000000000, +0x07ffc00000000000, +0x0fff800000000000, +0x1fff000000000000, +0x3ffe000000000000, +0x7ffc000000000000, +0xfff8000000000000, +0xfff0000000000001, +0xffe0000000000003, +0xffc0000000000007, +0xff8000000000000f, +0xff0000000000001f, +0xfe0000000000003f, +0xfc0000000000007f, +0xf8000000000000ff, +0xf0000000000001ff, +0xe0000000000003ff, +0xc0000000000007ff, +0x8000000000000fff, +0x0000000000003fff, +0x0000000000007ffe, +0x000000000000fffc, +0x000000000001fff8, +0x000000000003fff0, +0x000000000007ffe0, +0x00000000000fffc0, +0x00000000001fff80, +0x00000000003fff00, +0x00000000007ffe00, +0x0000000000fffc00, +0x0000000001fff800, +0x0000000003fff000, +0x0000000007ffe000, +0x000000000fffc000, +0x000000001fff8000, +0x000000003fff0000, +0x000000007ffe0000, +0x00000000fffc0000, +0x00000001fff80000, +0x00000003fff00000, +0x00000007ffe00000, +0x0000000fffc00000, +0x0000001fff800000, +0x0000003fff000000, +0x0000007ffe000000, +0x000000fffc000000, +0x000001fff8000000, +0x000003fff0000000, +0x000007ffe0000000, +0x00000fffc0000000, +0x00001fff80000000, +0x00003fff00000000, +0x00007ffe00000000, +0x0000fffc00000000, +0x0001fff800000000, +0x0003fff000000000, +0x0007ffe000000000, +0x000fffc000000000, +0x001fff8000000000, +0x003fff0000000000, +0x007ffe0000000000, +0x00fffc0000000000, +0x01fff80000000000, +0x03fff00000000000, +0x07ffe00000000000, +0x0fffc00000000000, +0x1fff800000000000, +0x3fff000000000000, +0x7ffe000000000000, +0xfffc000000000000, +0xfff8000000000001, +0xfff0000000000003, +0xffe0000000000007, +0xffc000000000000f, +0xff8000000000001f, +0xff0000000000003f, +0xfe0000000000007f, +0xfc000000000000ff, +0xf8000000000001ff, +0xf0000000000003ff, +0xe0000000000007ff, +0xc000000000000fff, +0x8000000000001fff, +0x0000000000007fff, +0x000000000000fffe, +0x000000000001fffc, +0x000000000003fff8, +0x000000000007fff0, +0x00000000000fffe0, +0x00000000001fffc0, +0x00000000003fff80, +0x00000000007fff00, +0x0000000000fffe00, +0x0000000001fffc00, +0x0000000003fff800, +0x0000000007fff000, +0x000000000fffe000, +0x000000001fffc000, +0x000000003fff8000, +0x000000007fff0000, +0x00000000fffe0000, +0x00000001fffc0000, +0x00000003fff80000, +0x00000007fff00000, +0x0000000fffe00000, +0x0000001fffc00000, +0x0000003fff800000, +0x0000007fff000000, +0x000000fffe000000, +0x000001fffc000000, +0x000003fff8000000, +0x000007fff0000000, +0x00000fffe0000000, +0x00001fffc0000000, +0x00003fff80000000, +0x00007fff00000000, +0x0000fffe00000000, +0x0001fffc00000000, +0x0003fff800000000, +0x0007fff000000000, +0x000fffe000000000, +0x001fffc000000000, +0x003fff8000000000, +0x007fff0000000000, +0x00fffe0000000000, +0x01fffc0000000000, +0x03fff80000000000, +0x07fff00000000000, +0x0fffe00000000000, +0x1fffc00000000000, +0x3fff800000000000, +0x7fff000000000000, +0xfffe000000000000, +0xfffc000000000001, +0xfff8000000000003, +0xfff0000000000007, +0xffe000000000000f, +0xffc000000000001f, +0xff8000000000003f, +0xff0000000000007f, +0xfe000000000000ff, +0xfc000000000001ff, +0xf8000000000003ff, +0xf0000000000007ff, +0xe000000000000fff, +0xc000000000001fff, +0x8000000000003fff, +0x000000000000ffff, +0x000000000001fffe, +0x000000000003fffc, +0x000000000007fff8, +0x00000000000ffff0, +0x00000000001fffe0, +0x00000000003fffc0, +0x00000000007fff80, +0x0000000000ffff00, +0x0000000001fffe00, +0x0000000003fffc00, +0x0000000007fff800, +0x000000000ffff000, +0x000000001fffe000, +0x000000003fffc000, +0x000000007fff8000, +0x00000000ffff0000, +0x00000001fffe0000, +0x00000003fffc0000, +0x00000007fff80000, +0x0000000ffff00000, +0x0000001fffe00000, +0x0000003fffc00000, +0x0000007fff800000, +0x000000ffff000000, +0x000001fffe000000, +0x000003fffc000000, +0x000007fff8000000, +0x00000ffff0000000, +0x00001fffe0000000, +0x00003fffc0000000, +0x00007fff80000000, +0x0000ffff00000000, +0x0001fffe00000000, +0x0003fffc00000000, +0x0007fff800000000, +0x000ffff000000000, +0x001fffe000000000, +0x003fffc000000000, +0x007fff8000000000, +0x00ffff0000000000, +0x01fffe0000000000, +0x03fffc0000000000, +0x07fff80000000000, +0x0ffff00000000000, +0x1fffe00000000000, +0x3fffc00000000000, +0x7fff800000000000, +0xffff000000000000, +0xfffe000000000001, +0xfffc000000000003, +0xfff8000000000007, +0xfff000000000000f, +0xffe000000000001f, +0xffc000000000003f, +0xff8000000000007f, +0xff000000000000ff, +0xfe000000000001ff, +0xfc000000000003ff, +0xf8000000000007ff, +0xf000000000000fff, +0xe000000000001fff, +0xc000000000003fff, +0x8000000000007fff, +0x000000000001ffff, +0x000000000003fffe, +0x000000000007fffc, +0x00000000000ffff8, +0x00000000001ffff0, +0x00000000003fffe0, +0x00000000007fffc0, +0x0000000000ffff80, +0x0000000001ffff00, +0x0000000003fffe00, +0x0000000007fffc00, +0x000000000ffff800, +0x000000001ffff000, +0x000000003fffe000, +0x000000007fffc000, +0x00000000ffff8000, +0x00000001ffff0000, +0x00000003fffe0000, +0x00000007fffc0000, +0x0000000ffff80000, +0x0000001ffff00000, +0x0000003fffe00000, +0x0000007fffc00000, +0x000000ffff800000, +0x000001ffff000000, +0x000003fffe000000, +0x000007fffc000000, +0x00000ffff8000000, +0x00001ffff0000000, +0x00003fffe0000000, +0x00007fffc0000000, +0x0000ffff80000000, +0x0001ffff00000000, +0x0003fffe00000000, +0x0007fffc00000000, +0x000ffff800000000, +0x001ffff000000000, +0x003fffe000000000, +0x007fffc000000000, +0x00ffff8000000000, +0x01ffff0000000000, +0x03fffe0000000000, +0x07fffc0000000000, +0x0ffff80000000000, +0x1ffff00000000000, +0x3fffe00000000000, +0x7fffc00000000000, +0xffff800000000000, +0xffff000000000001, +0xfffe000000000003, +0xfffc000000000007, +0xfff800000000000f, +0xfff000000000001f, +0xffe000000000003f, +0xffc000000000007f, +0xff800000000000ff, +0xff000000000001ff, +0xfe000000000003ff, +0xfc000000000007ff, +0xf800000000000fff, +0xf000000000001fff, +0xe000000000003fff, +0xc000000000007fff, +0x800000000000ffff, +0x000000000003ffff, +0x000000000007fffe, +0x00000000000ffffc, +0x00000000001ffff8, +0x00000000003ffff0, +0x00000000007fffe0, +0x0000000000ffffc0, +0x0000000001ffff80, +0x0000000003ffff00, +0x0000000007fffe00, +0x000000000ffffc00, +0x000000001ffff800, +0x000000003ffff000, +0x000000007fffe000, +0x00000000ffffc000, +0x00000001ffff8000, +0x00000003ffff0000, +0x00000007fffe0000, +0x0000000ffffc0000, +0x0000001ffff80000, +0x0000003ffff00000, +0x0000007fffe00000, +0x000000ffffc00000, +0x000001ffff800000, +0x000003ffff000000, +0x000007fffe000000, +0x00000ffffc000000, +0x00001ffff8000000, +0x00003ffff0000000, +0x00007fffe0000000, +0x0000ffffc0000000, +0x0001ffff80000000, +0x0003ffff00000000, +0x0007fffe00000000, +0x000ffffc00000000, +0x001ffff800000000, +0x003ffff000000000, +0x007fffe000000000, +0x00ffffc000000000, +0x01ffff8000000000, +0x03ffff0000000000, +0x07fffe0000000000, +0x0ffffc0000000000, +0x1ffff80000000000, +0x3ffff00000000000, +0x7fffe00000000000, +0xffffc00000000000, +0xffff800000000001, +0xffff000000000003, +0xfffe000000000007, +0xfffc00000000000f, +0xfff800000000001f, +0xfff000000000003f, +0xffe000000000007f, +0xffc00000000000ff, +0xff800000000001ff, +0xff000000000003ff, +0xfe000000000007ff, +0xfc00000000000fff, +0xf800000000001fff, +0xf000000000003fff, +0xe000000000007fff, +0xc00000000000ffff, +0x800000000001ffff, +0x000000000007ffff, +0x00000000000ffffe, +0x00000000001ffffc, +0x00000000003ffff8, +0x00000000007ffff0, +0x0000000000ffffe0, +0x0000000001ffffc0, +0x0000000003ffff80, +0x0000000007ffff00, +0x000000000ffffe00, +0x000000001ffffc00, +0x000000003ffff800, +0x000000007ffff000, +0x00000000ffffe000, +0x00000001ffffc000, +0x00000003ffff8000, +0x00000007ffff0000, +0x0000000ffffe0000, +0x0000001ffffc0000, +0x0000003ffff80000, +0x0000007ffff00000, +0x000000ffffe00000, +0x000001ffffc00000, +0x000003ffff800000, +0x000007ffff000000, +0x00000ffffe000000, +0x00001ffffc000000, +0x00003ffff8000000, +0x00007ffff0000000, +0x0000ffffe0000000, +0x0001ffffc0000000, +0x0003ffff80000000, +0x0007ffff00000000, +0x000ffffe00000000, +0x001ffffc00000000, +0x003ffff800000000, +0x007ffff000000000, +0x00ffffe000000000, +0x01ffffc000000000, +0x03ffff8000000000, +0x07ffff0000000000, +0x0ffffe0000000000, +0x1ffffc0000000000, +0x3ffff80000000000, +0x7ffff00000000000, +0xffffe00000000000, +0xffffc00000000001, +0xffff800000000003, +0xffff000000000007, +0xfffe00000000000f, +0xfffc00000000001f, +0xfff800000000003f, +0xfff000000000007f, +0xffe00000000000ff, +0xffc00000000001ff, +0xff800000000003ff, +0xff000000000007ff, +0xfe00000000000fff, +0xfc00000000001fff, +0xf800000000003fff, +0xf000000000007fff, +0xe00000000000ffff, +0xc00000000001ffff, +0x800000000003ffff, +0x00000000000fffff, +0x00000000001ffffe, +0x00000000003ffffc, +0x00000000007ffff8, +0x0000000000fffff0, +0x0000000001ffffe0, +0x0000000003ffffc0, +0x0000000007ffff80, +0x000000000fffff00, +0x000000001ffffe00, +0x000000003ffffc00, +0x000000007ffff800, +0x00000000fffff000, +0x00000001ffffe000, +0x00000003ffffc000, +0x00000007ffff8000, +0x0000000fffff0000, +0x0000001ffffe0000, +0x0000003ffffc0000, +0x0000007ffff80000, +0x000000fffff00000, +0x000001ffffe00000, +0x000003ffffc00000, +0x000007ffff800000, +0x00000fffff000000, +0x00001ffffe000000, +0x00003ffffc000000, +0x00007ffff8000000, +0x0000fffff0000000, +0x0001ffffe0000000, +0x0003ffffc0000000, +0x0007ffff80000000, +0x000fffff00000000, +0x001ffffe00000000, +0x003ffffc00000000, +0x007ffff800000000, +0x00fffff000000000, +0x01ffffe000000000, +0x03ffffc000000000, +0x07ffff8000000000, +0x0fffff0000000000, +0x1ffffe0000000000, +0x3ffffc0000000000, +0x7ffff80000000000, +0xfffff00000000000, +0xffffe00000000001, +0xffffc00000000003, +0xffff800000000007, +0xffff00000000000f, +0xfffe00000000001f, +0xfffc00000000003f, +0xfff800000000007f, +0xfff00000000000ff, +0xffe00000000001ff, +0xffc00000000003ff, +0xff800000000007ff, +0xff00000000000fff, +0xfe00000000001fff, +0xfc00000000003fff, +0xf800000000007fff, +0xf00000000000ffff, +0xe00000000001ffff, +0xc00000000003ffff, +0x800000000007ffff, +0x00000000001fffff, +0x00000000003ffffe, +0x00000000007ffffc, +0x0000000000fffff8, +0x0000000001fffff0, +0x0000000003ffffe0, +0x0000000007ffffc0, +0x000000000fffff80, +0x000000001fffff00, +0x000000003ffffe00, +0x000000007ffffc00, +0x00000000fffff800, +0x00000001fffff000, +0x00000003ffffe000, +0x00000007ffffc000, +0x0000000fffff8000, +0x0000001fffff0000, +0x0000003ffffe0000, +0x0000007ffffc0000, +0x000000fffff80000, +0x000001fffff00000, +0x000003ffffe00000, +0x000007ffffc00000, +0x00000fffff800000, +0x00001fffff000000, +0x00003ffffe000000, +0x00007ffffc000000, +0x0000fffff8000000, +0x0001fffff0000000, +0x0003ffffe0000000, +0x0007ffffc0000000, +0x000fffff80000000, +0x001fffff00000000, +0x003ffffe00000000, +0x007ffffc00000000, +0x00fffff800000000, +0x01fffff000000000, +0x03ffffe000000000, +0x07ffffc000000000, +0x0fffff8000000000, +0x1fffff0000000000, +0x3ffffe0000000000, +0x7ffffc0000000000, +0xfffff80000000000, +0xfffff00000000001, +0xffffe00000000003, +0xffffc00000000007, +0xffff80000000000f, +0xffff00000000001f, +0xfffe00000000003f, +0xfffc00000000007f, +0xfff80000000000ff, +0xfff00000000001ff, +0xffe00000000003ff, +0xffc00000000007ff, +0xff80000000000fff, +0xff00000000001fff, +0xfe00000000003fff, +0xfc00000000007fff, +0xf80000000000ffff, +0xf00000000001ffff, +0xe00000000003ffff, +0xc00000000007ffff, +0x80000000000fffff, +0x00000000003fffff, +0x00000000007ffffe, +0x0000000000fffffc, +0x0000000001fffff8, +0x0000000003fffff0, +0x0000000007ffffe0, +0x000000000fffffc0, +0x000000001fffff80, +0x000000003fffff00, +0x000000007ffffe00, +0x00000000fffffc00, +0x00000001fffff800, +0x00000003fffff000, +0x00000007ffffe000, +0x0000000fffffc000, +0x0000001fffff8000, +0x0000003fffff0000, +0x0000007ffffe0000, +0x000000fffffc0000, +0x000001fffff80000, +0x000003fffff00000, +0x000007ffffe00000, +0x00000fffffc00000, +0x00001fffff800000, +0x00003fffff000000, +0x00007ffffe000000, +0x0000fffffc000000, +0x0001fffff8000000, +0x0003fffff0000000, +0x0007ffffe0000000, +0x000fffffc0000000, +0x001fffff80000000, +0x003fffff00000000, +0x007ffffe00000000, +0x00fffffc00000000, +0x01fffff800000000, +0x03fffff000000000, +0x07ffffe000000000, +0x0fffffc000000000, +0x1fffff8000000000, +0x3fffff0000000000, +0x7ffffe0000000000, +0xfffffc0000000000, +0xfffff80000000001, +0xfffff00000000003, +0xffffe00000000007, +0xffffc0000000000f, +0xffff80000000001f, +0xffff00000000003f, +0xfffe00000000007f, +0xfffc0000000000ff, +0xfff80000000001ff, +0xfff00000000003ff, +0xffe00000000007ff, +0xffc0000000000fff, +0xff80000000001fff, +0xff00000000003fff, +0xfe00000000007fff, +0xfc0000000000ffff, +0xf80000000001ffff, +0xf00000000003ffff, +0xe00000000007ffff, +0xc0000000000fffff, +0x80000000001fffff, +0x00000000007fffff, +0x0000000000fffffe, +0x0000000001fffffc, +0x0000000003fffff8, +0x0000000007fffff0, +0x000000000fffffe0, +0x000000001fffffc0, +0x000000003fffff80, +0x000000007fffff00, +0x00000000fffffe00, +0x00000001fffffc00, +0x00000003fffff800, +0x00000007fffff000, +0x0000000fffffe000, +0x0000001fffffc000, +0x0000003fffff8000, +0x0000007fffff0000, +0x000000fffffe0000, +0x000001fffffc0000, +0x000003fffff80000, +0x000007fffff00000, +0x00000fffffe00000, +0x00001fffffc00000, +0x00003fffff800000, +0x00007fffff000000, +0x0000fffffe000000, +0x0001fffffc000000, +0x0003fffff8000000, +0x0007fffff0000000, +0x000fffffe0000000, +0x001fffffc0000000, +0x003fffff80000000, +0x007fffff00000000, +0x00fffffe00000000, +0x01fffffc00000000, +0x03fffff800000000, +0x07fffff000000000, +0x0fffffe000000000, +0x1fffffc000000000, +0x3fffff8000000000, +0x7fffff0000000000, +0xfffffe0000000000, +0xfffffc0000000001, +0xfffff80000000003, +0xfffff00000000007, +0xffffe0000000000f, +0xffffc0000000001f, +0xffff80000000003f, +0xffff00000000007f, +0xfffe0000000000ff, +0xfffc0000000001ff, +0xfff80000000003ff, +0xfff00000000007ff, +0xffe0000000000fff, +0xffc0000000001fff, +0xff80000000003fff, +0xff00000000007fff, +0xfe0000000000ffff, +0xfc0000000001ffff, +0xf80000000003ffff, +0xf00000000007ffff, +0xe0000000000fffff, +0xc0000000001fffff, +0x80000000003fffff, +0x0000000000ffffff, +0x0000000001fffffe, +0x0000000003fffffc, +0x0000000007fffff8, +0x000000000ffffff0, +0x000000001fffffe0, +0x000000003fffffc0, +0x000000007fffff80, +0x00000000ffffff00, +0x00000001fffffe00, +0x00000003fffffc00, +0x00000007fffff800, +0x0000000ffffff000, +0x0000001fffffe000, +0x0000003fffffc000, +0x0000007fffff8000, +0x000000ffffff0000, +0x000001fffffe0000, +0x000003fffffc0000, +0x000007fffff80000, +0x00000ffffff00000, +0x00001fffffe00000, +0x00003fffffc00000, +0x00007fffff800000, +0x0000ffffff000000, +0x0001fffffe000000, +0x0003fffffc000000, +0x0007fffff8000000, +0x000ffffff0000000, +0x001fffffe0000000, +0x003fffffc0000000, +0x007fffff80000000, +0x00ffffff00000000, +0x01fffffe00000000, +0x03fffffc00000000, +0x07fffff800000000, +0x0ffffff000000000, +0x1fffffe000000000, +0x3fffffc000000000, +0x7fffff8000000000, +0xffffff0000000000, +0xfffffe0000000001, +0xfffffc0000000003, +0xfffff80000000007, +0xfffff0000000000f, +0xffffe0000000001f, +0xffffc0000000003f, +0xffff80000000007f, +0xffff0000000000ff, +0xfffe0000000001ff, +0xfffc0000000003ff, +0xfff80000000007ff, +0xfff0000000000fff, +0xffe0000000001fff, +0xffc0000000003fff, +0xff80000000007fff, +0xff0000000000ffff, +0xfe0000000001ffff, +0xfc0000000003ffff, +0xf80000000007ffff, +0xf0000000000fffff, +0xe0000000001fffff, +0xc0000000003fffff, +0x80000000007fffff, +0x0000000001ffffff, +0x0000000003fffffe, +0x0000000007fffffc, +0x000000000ffffff8, +0x000000001ffffff0, +0x000000003fffffe0, +0x000000007fffffc0, +0x00000000ffffff80, +0x00000001ffffff00, +0x00000003fffffe00, +0x00000007fffffc00, +0x0000000ffffff800, +0x0000001ffffff000, +0x0000003fffffe000, +0x0000007fffffc000, +0x000000ffffff8000, +0x000001ffffff0000, +0x000003fffffe0000, +0x000007fffffc0000, +0x00000ffffff80000, +0x00001ffffff00000, +0x00003fffffe00000, +0x00007fffffc00000, +0x0000ffffff800000, +0x0001ffffff000000, +0x0003fffffe000000, +0x0007fffffc000000, +0x000ffffff8000000, +0x001ffffff0000000, +0x003fffffe0000000, +0x007fffffc0000000, +0x00ffffff80000000, +0x01ffffff00000000, +0x03fffffe00000000, +0x07fffffc00000000, +0x0ffffff800000000, +0x1ffffff000000000, +0x3fffffe000000000, +0x7fffffc000000000, +0xffffff8000000000, +0xffffff0000000001, +0xfffffe0000000003, +0xfffffc0000000007, +0xfffff8000000000f, +0xfffff0000000001f, +0xffffe0000000003f, +0xffffc0000000007f, +0xffff8000000000ff, +0xffff0000000001ff, +0xfffe0000000003ff, +0xfffc0000000007ff, +0xfff8000000000fff, +0xfff0000000001fff, +0xffe0000000003fff, +0xffc0000000007fff, +0xff8000000000ffff, +0xff0000000001ffff, +0xfe0000000003ffff, +0xfc0000000007ffff, +0xf8000000000fffff, +0xf0000000001fffff, +0xe0000000003fffff, +0xc0000000007fffff, +0x8000000000ffffff, +0x0000000003ffffff, +0x0000000007fffffe, +0x000000000ffffffc, +0x000000001ffffff8, +0x000000003ffffff0, +0x000000007fffffe0, +0x00000000ffffffc0, +0x00000001ffffff80, +0x00000003ffffff00, +0x00000007fffffe00, +0x0000000ffffffc00, +0x0000001ffffff800, +0x0000003ffffff000, +0x0000007fffffe000, +0x000000ffffffc000, +0x000001ffffff8000, +0x000003ffffff0000, +0x000007fffffe0000, +0x00000ffffffc0000, +0x00001ffffff80000, +0x00003ffffff00000, +0x00007fffffe00000, +0x0000ffffffc00000, +0x0001ffffff800000, +0x0003ffffff000000, +0x0007fffffe000000, +0x000ffffffc000000, +0x001ffffff8000000, +0x003ffffff0000000, +0x007fffffe0000000, +0x00ffffffc0000000, +0x01ffffff80000000, +0x03ffffff00000000, +0x07fffffe00000000, +0x0ffffffc00000000, +0x1ffffff800000000, +0x3ffffff000000000, +0x7fffffe000000000, +0xffffffc000000000, +0xffffff8000000001, +0xffffff0000000003, +0xfffffe0000000007, +0xfffffc000000000f, +0xfffff8000000001f, +0xfffff0000000003f, +0xffffe0000000007f, +0xffffc000000000ff, +0xffff8000000001ff, +0xffff0000000003ff, +0xfffe0000000007ff, +0xfffc000000000fff, +0xfff8000000001fff, +0xfff0000000003fff, +0xffe0000000007fff, +0xffc000000000ffff, +0xff8000000001ffff, +0xff0000000003ffff, +0xfe0000000007ffff, +0xfc000000000fffff, +0xf8000000001fffff, +0xf0000000003fffff, +0xe0000000007fffff, +0xc000000000ffffff, +0x8000000001ffffff, +0x0000000007ffffff, +0x000000000ffffffe, +0x000000001ffffffc, +0x000000003ffffff8, +0x000000007ffffff0, +0x00000000ffffffe0, +0x00000001ffffffc0, +0x00000003ffffff80, +0x00000007ffffff00, +0x0000000ffffffe00, +0x0000001ffffffc00, +0x0000003ffffff800, +0x0000007ffffff000, +0x000000ffffffe000, +0x000001ffffffc000, +0x000003ffffff8000, +0x000007ffffff0000, +0x00000ffffffe0000, +0x00001ffffffc0000, +0x00003ffffff80000, +0x00007ffffff00000, +0x0000ffffffe00000, +0x0001ffffffc00000, +0x0003ffffff800000, +0x0007ffffff000000, +0x000ffffffe000000, +0x001ffffffc000000, +0x003ffffff8000000, +0x007ffffff0000000, +0x00ffffffe0000000, +0x01ffffffc0000000, +0x03ffffff80000000, +0x07ffffff00000000, +0x0ffffffe00000000, +0x1ffffffc00000000, +0x3ffffff800000000, +0x7ffffff000000000, +0xffffffe000000000, +0xffffffc000000001, +0xffffff8000000003, +0xffffff0000000007, +0xfffffe000000000f, +0xfffffc000000001f, +0xfffff8000000003f, +0xfffff0000000007f, +0xffffe000000000ff, +0xffffc000000001ff, +0xffff8000000003ff, +0xffff0000000007ff, +0xfffe000000000fff, +0xfffc000000001fff, +0xfff8000000003fff, +0xfff0000000007fff, +0xffe000000000ffff, +0xffc000000001ffff, +0xff8000000003ffff, +0xff0000000007ffff, +0xfe000000000fffff, +0xfc000000001fffff, +0xf8000000003fffff, +0xf0000000007fffff, +0xe000000000ffffff, +0xc000000001ffffff, +0x8000000003ffffff, +0x000000000fffffff, +0x000000001ffffffe, +0x000000003ffffffc, +0x000000007ffffff8, +0x00000000fffffff0, +0x00000001ffffffe0, +0x00000003ffffffc0, +0x00000007ffffff80, +0x0000000fffffff00, +0x0000001ffffffe00, +0x0000003ffffffc00, +0x0000007ffffff800, +0x000000fffffff000, +0x000001ffffffe000, +0x000003ffffffc000, +0x000007ffffff8000, +0x00000fffffff0000, +0x00001ffffffe0000, +0x00003ffffffc0000, +0x00007ffffff80000, +0x0000fffffff00000, +0x0001ffffffe00000, +0x0003ffffffc00000, +0x0007ffffff800000, +0x000fffffff000000, +0x001ffffffe000000, +0x003ffffffc000000, +0x007ffffff8000000, +0x00fffffff0000000, +0x01ffffffe0000000, +0x03ffffffc0000000, +0x07ffffff80000000, +0x0fffffff00000000, +0x1ffffffe00000000, +0x3ffffffc00000000, +0x7ffffff800000000, +0xfffffff000000000, +0xffffffe000000001, +0xffffffc000000003, +0xffffff8000000007, +0xffffff000000000f, +0xfffffe000000001f, +0xfffffc000000003f, +0xfffff8000000007f, +0xfffff000000000ff, +0xffffe000000001ff, +0xffffc000000003ff, +0xffff8000000007ff, +0xffff000000000fff, +0xfffe000000001fff, +0xfffc000000003fff, +0xfff8000000007fff, +0xfff000000000ffff, +0xffe000000001ffff, +0xffc000000003ffff, +0xff8000000007ffff, +0xff000000000fffff, +0xfe000000001fffff, +0xfc000000003fffff, +0xf8000000007fffff, +0xf000000000ffffff, +0xe000000001ffffff, +0xc000000003ffffff, +0x8000000007ffffff, +0x000000001fffffff, +0x000000003ffffffe, +0x000000007ffffffc, +0x00000000fffffff8, +0x00000001fffffff0, +0x00000003ffffffe0, +0x00000007ffffffc0, +0x0000000fffffff80, +0x0000001fffffff00, +0x0000003ffffffe00, +0x0000007ffffffc00, +0x000000fffffff800, +0x000001fffffff000, +0x000003ffffffe000, +0x000007ffffffc000, +0x00000fffffff8000, +0x00001fffffff0000, +0x00003ffffffe0000, +0x00007ffffffc0000, +0x0000fffffff80000, +0x0001fffffff00000, +0x0003ffffffe00000, +0x0007ffffffc00000, +0x000fffffff800000, +0x001fffffff000000, +0x003ffffffe000000, +0x007ffffffc000000, +0x00fffffff8000000, +0x01fffffff0000000, +0x03ffffffe0000000, +0x07ffffffc0000000, +0x0fffffff80000000, +0x1fffffff00000000, +0x3ffffffe00000000, +0x7ffffffc00000000, +0xfffffff800000000, +0xfffffff000000001, +0xffffffe000000003, +0xffffffc000000007, +0xffffff800000000f, +0xffffff000000001f, +0xfffffe000000003f, +0xfffffc000000007f, +0xfffff800000000ff, +0xfffff000000001ff, +0xffffe000000003ff, +0xffffc000000007ff, +0xffff800000000fff, +0xffff000000001fff, +0xfffe000000003fff, +0xfffc000000007fff, +0xfff800000000ffff, +0xfff000000001ffff, +0xffe000000003ffff, +0xffc000000007ffff, +0xff800000000fffff, +0xff000000001fffff, +0xfe000000003fffff, +0xfc000000007fffff, +0xf800000000ffffff, +0xf000000001ffffff, +0xe000000003ffffff, +0xc000000007ffffff, +0x800000000fffffff, +0x000000003fffffff, +0x000000007ffffffe, +0x00000000fffffffc, +0x00000001fffffff8, +0x00000003fffffff0, +0x00000007ffffffe0, +0x0000000fffffffc0, +0x0000001fffffff80, +0x0000003fffffff00, +0x0000007ffffffe00, +0x000000fffffffc00, +0x000001fffffff800, +0x000003fffffff000, +0x000007ffffffe000, +0x00000fffffffc000, +0x00001fffffff8000, +0x00003fffffff0000, +0x00007ffffffe0000, +0x0000fffffffc0000, +0x0001fffffff80000, +0x0003fffffff00000, +0x0007ffffffe00000, +0x000fffffffc00000, +0x001fffffff800000, +0x003fffffff000000, +0x007ffffffe000000, +0x00fffffffc000000, +0x01fffffff8000000, +0x03fffffff0000000, +0x07ffffffe0000000, +0x0fffffffc0000000, +0x1fffffff80000000, +0x3fffffff00000000, +0x7ffffffe00000000, +0xfffffffc00000000, +0xfffffff800000001, +0xfffffff000000003, +0xffffffe000000007, +0xffffffc00000000f, +0xffffff800000001f, +0xffffff000000003f, +0xfffffe000000007f, +0xfffffc00000000ff, +0xfffff800000001ff, +0xfffff000000003ff, +0xffffe000000007ff, +0xffffc00000000fff, +0xffff800000001fff, +0xffff000000003fff, +0xfffe000000007fff, +0xfffc00000000ffff, +0xfff800000001ffff, +0xfff000000003ffff, +0xffe000000007ffff, +0xffc00000000fffff, +0xff800000001fffff, +0xff000000003fffff, +0xfe000000007fffff, +0xfc00000000ffffff, +0xf800000001ffffff, +0xf000000003ffffff, +0xe000000007ffffff, +0xc00000000fffffff, +0x800000001fffffff, +0x000000007fffffff, +0x00000000fffffffe, +0x00000001fffffffc, +0x00000003fffffff8, +0x00000007fffffff0, +0x0000000fffffffe0, +0x0000001fffffffc0, +0x0000003fffffff80, +0x0000007fffffff00, +0x000000fffffffe00, +0x000001fffffffc00, +0x000003fffffff800, +0x000007fffffff000, +0x00000fffffffe000, +0x00001fffffffc000, +0x00003fffffff8000, +0x00007fffffff0000, +0x0000fffffffe0000, +0x0001fffffffc0000, +0x0003fffffff80000, +0x0007fffffff00000, +0x000fffffffe00000, +0x001fffffffc00000, +0x003fffffff800000, +0x007fffffff000000, +0x00fffffffe000000, +0x01fffffffc000000, +0x03fffffff8000000, +0x07fffffff0000000, +0x0fffffffe0000000, +0x1fffffffc0000000, +0x3fffffff80000000, +0x7fffffff00000000, +0xfffffffe00000000, +0xfffffffc00000001, +0xfffffff800000003, +0xfffffff000000007, +0xffffffe00000000f, +0xffffffc00000001f, +0xffffff800000003f, +0xffffff000000007f, +0xfffffe00000000ff, +0xfffffc00000001ff, +0xfffff800000003ff, +0xfffff000000007ff, +0xffffe00000000fff, +0xffffc00000001fff, +0xffff800000003fff, +0xffff000000007fff, +0xfffe00000000ffff, +0xfffc00000001ffff, +0xfff800000003ffff, +0xfff000000007ffff, +0xffe00000000fffff, +0xffc00000001fffff, +0xff800000003fffff, +0xff000000007fffff, +0xfe00000000ffffff, +0xfc00000001ffffff, +0xf800000003ffffff, +0xf000000007ffffff, +0xe00000000fffffff, +0xc00000001fffffff, +0x800000003fffffff, +0x00000000ffffffff, +0x00000001fffffffe, +0x00000003fffffffc, +0x00000007fffffff8, +0x0000000ffffffff0, +0x0000001fffffffe0, +0x0000003fffffffc0, +0x0000007fffffff80, +0x000000ffffffff00, +0x000001fffffffe00, +0x000003fffffffc00, +0x000007fffffff800, +0x00000ffffffff000, +0x00001fffffffe000, +0x00003fffffffc000, +0x00007fffffff8000, +0x0000ffffffff0000, +0x0001fffffffe0000, +0x0003fffffffc0000, +0x0007fffffff80000, +0x000ffffffff00000, +0x001fffffffe00000, +0x003fffffffc00000, +0x007fffffff800000, +0x00ffffffff000000, +0x01fffffffe000000, +0x03fffffffc000000, +0x07fffffff8000000, +0x0ffffffff0000000, +0x1fffffffe0000000, +0x3fffffffc0000000, +0x7fffffff80000000, +0xffffffff00000000, +0xfffffffe00000001, +0xfffffffc00000003, +0xfffffff800000007, +0xfffffff00000000f, +0xffffffe00000001f, +0xffffffc00000003f, +0xffffff800000007f, +0xffffff00000000ff, +0xfffffe00000001ff, +0xfffffc00000003ff, +0xfffff800000007ff, +0xfffff00000000fff, +0xffffe00000001fff, +0xffffc00000003fff, +0xffff800000007fff, +0xffff00000000ffff, +0xfffe00000001ffff, +0xfffc00000003ffff, +0xfff800000007ffff, +0xfff00000000fffff, +0xffe00000001fffff, +0xffc00000003fffff, +0xff800000007fffff, +0xff00000000ffffff, +0xfe00000001ffffff, +0xfc00000003ffffff, +0xf800000007ffffff, +0xf00000000fffffff, +0xe00000001fffffff, +0xc00000003fffffff, +0x800000007fffffff, +0x00000001ffffffff, +0x00000003fffffffe, +0x00000007fffffffc, +0x0000000ffffffff8, +0x0000001ffffffff0, +0x0000003fffffffe0, +0x0000007fffffffc0, +0x000000ffffffff80, +0x000001ffffffff00, +0x000003fffffffe00, +0x000007fffffffc00, +0x00000ffffffff800, +0x00001ffffffff000, +0x00003fffffffe000, +0x00007fffffffc000, +0x0000ffffffff8000, +0x0001ffffffff0000, +0x0003fffffffe0000, +0x0007fffffffc0000, +0x000ffffffff80000, +0x001ffffffff00000, +0x003fffffffe00000, +0x007fffffffc00000, +0x00ffffffff800000, +0x01ffffffff000000, +0x03fffffffe000000, +0x07fffffffc000000, +0x0ffffffff8000000, +0x1ffffffff0000000, +0x3fffffffe0000000, +0x7fffffffc0000000, +0xffffffff80000000, +0xffffffff00000001, +0xfffffffe00000003, +0xfffffffc00000007, +0xfffffff80000000f, +0xfffffff00000001f, +0xffffffe00000003f, +0xffffffc00000007f, +0xffffff80000000ff, +0xffffff00000001ff, +0xfffffe00000003ff, +0xfffffc00000007ff, +0xfffff80000000fff, +0xfffff00000001fff, +0xffffe00000003fff, +0xffffc00000007fff, +0xffff80000000ffff, +0xffff00000001ffff, +0xfffe00000003ffff, +0xfffc00000007ffff, +0xfff80000000fffff, +0xfff00000001fffff, +0xffe00000003fffff, +0xffc00000007fffff, +0xff80000000ffffff, +0xff00000001ffffff, +0xfe00000003ffffff, +0xfc00000007ffffff, +0xf80000000fffffff, +0xf00000001fffffff, +0xe00000003fffffff, +0xc00000007fffffff, +0x80000000ffffffff, +0x00000003ffffffff, +0x00000007fffffffe, +0x0000000ffffffffc, +0x0000001ffffffff8, +0x0000003ffffffff0, +0x0000007fffffffe0, +0x000000ffffffffc0, +0x000001ffffffff80, +0x000003ffffffff00, +0x000007fffffffe00, +0x00000ffffffffc00, +0x00001ffffffff800, +0x00003ffffffff000, +0x00007fffffffe000, +0x0000ffffffffc000, +0x0001ffffffff8000, +0x0003ffffffff0000, +0x0007fffffffe0000, +0x000ffffffffc0000, +0x001ffffffff80000, +0x003ffffffff00000, +0x007fffffffe00000, +0x00ffffffffc00000, +0x01ffffffff800000, +0x03ffffffff000000, +0x07fffffffe000000, +0x0ffffffffc000000, +0x1ffffffff8000000, +0x3ffffffff0000000, +0x7fffffffe0000000, +0xffffffffc0000000, +0xffffffff80000001, +0xffffffff00000003, +0xfffffffe00000007, +0xfffffffc0000000f, +0xfffffff80000001f, +0xfffffff00000003f, +0xffffffe00000007f, +0xffffffc0000000ff, +0xffffff80000001ff, +0xffffff00000003ff, +0xfffffe00000007ff, +0xfffffc0000000fff, +0xfffff80000001fff, +0xfffff00000003fff, +0xffffe00000007fff, +0xffffc0000000ffff, +0xffff80000001ffff, +0xffff00000003ffff, +0xfffe00000007ffff, +0xfffc0000000fffff, +0xfff80000001fffff, +0xfff00000003fffff, +0xffe00000007fffff, +0xffc0000000ffffff, +0xff80000001ffffff, +0xff00000003ffffff, +0xfe00000007ffffff, +0xfc0000000fffffff, +0xf80000001fffffff, +0xf00000003fffffff, +0xe00000007fffffff, +0xc0000000ffffffff, +0x80000001ffffffff, +0x00000007ffffffff, +0x0000000ffffffffe, +0x0000001ffffffffc, +0x0000003ffffffff8, +0x0000007ffffffff0, +0x000000ffffffffe0, +0x000001ffffffffc0, +0x000003ffffffff80, +0x000007ffffffff00, +0x00000ffffffffe00, +0x00001ffffffffc00, +0x00003ffffffff800, +0x00007ffffffff000, +0x0000ffffffffe000, +0x0001ffffffffc000, +0x0003ffffffff8000, +0x0007ffffffff0000, +0x000ffffffffe0000, +0x001ffffffffc0000, +0x003ffffffff80000, +0x007ffffffff00000, +0x00ffffffffe00000, +0x01ffffffffc00000, +0x03ffffffff800000, +0x07ffffffff000000, +0x0ffffffffe000000, +0x1ffffffffc000000, +0x3ffffffff8000000, +0x7ffffffff0000000, +0xffffffffe0000000, +0xffffffffc0000001, +0xffffffff80000003, +0xffffffff00000007, +0xfffffffe0000000f, +0xfffffffc0000001f, +0xfffffff80000003f, +0xfffffff00000007f, +0xffffffe0000000ff, +0xffffffc0000001ff, +0xffffff80000003ff, +0xffffff00000007ff, +0xfffffe0000000fff, +0xfffffc0000001fff, +0xfffff80000003fff, +0xfffff00000007fff, +0xffffe0000000ffff, +0xffffc0000001ffff, +0xffff80000003ffff, +0xffff00000007ffff, +0xfffe0000000fffff, +0xfffc0000001fffff, +0xfff80000003fffff, +0xfff00000007fffff, +0xffe0000000ffffff, +0xffc0000001ffffff, +0xff80000003ffffff, +0xff00000007ffffff, +0xfe0000000fffffff, +0xfc0000001fffffff, +0xf80000003fffffff, +0xf00000007fffffff, +0xe0000000ffffffff, +0xc0000001ffffffff, +0x80000003ffffffff, +0x0000000fffffffff, +0x0000001ffffffffe, +0x0000003ffffffffc, +0x0000007ffffffff8, +0x000000fffffffff0, +0x000001ffffffffe0, +0x000003ffffffffc0, +0x000007ffffffff80, +0x00000fffffffff00, +0x00001ffffffffe00, +0x00003ffffffffc00, +0x00007ffffffff800, +0x0000fffffffff000, +0x0001ffffffffe000, +0x0003ffffffffc000, +0x0007ffffffff8000, +0x000fffffffff0000, +0x001ffffffffe0000, +0x003ffffffffc0000, +0x007ffffffff80000, +0x00fffffffff00000, +0x01ffffffffe00000, +0x03ffffffffc00000, +0x07ffffffff800000, +0x0fffffffff000000, +0x1ffffffffe000000, +0x3ffffffffc000000, +0x7ffffffff8000000, +0xfffffffff0000000, +0xffffffffe0000001, +0xffffffffc0000003, +0xffffffff80000007, +0xffffffff0000000f, +0xfffffffe0000001f, +0xfffffffc0000003f, +0xfffffff80000007f, +0xfffffff0000000ff, +0xffffffe0000001ff, +0xffffffc0000003ff, +0xffffff80000007ff, +0xffffff0000000fff, +0xfffffe0000001fff, +0xfffffc0000003fff, +0xfffff80000007fff, +0xfffff0000000ffff, +0xffffe0000001ffff, +0xffffc0000003ffff, +0xffff80000007ffff, +0xffff0000000fffff, +0xfffe0000001fffff, +0xfffc0000003fffff, +0xfff80000007fffff, +0xfff0000000ffffff, +0xffe0000001ffffff, +0xffc0000003ffffff, +0xff80000007ffffff, +0xff0000000fffffff, +0xfe0000001fffffff, +0xfc0000003fffffff, +0xf80000007fffffff, +0xf0000000ffffffff, +0xe0000001ffffffff, +0xc0000003ffffffff, +0x80000007ffffffff, +0x0000001fffffffff, +0x0000003ffffffffe, +0x0000007ffffffffc, +0x000000fffffffff8, +0x000001fffffffff0, +0x000003ffffffffe0, +0x000007ffffffffc0, +0x00000fffffffff80, +0x00001fffffffff00, +0x00003ffffffffe00, +0x00007ffffffffc00, +0x0000fffffffff800, +0x0001fffffffff000, +0x0003ffffffffe000, +0x0007ffffffffc000, +0x000fffffffff8000, +0x001fffffffff0000, +0x003ffffffffe0000, +0x007ffffffffc0000, +0x00fffffffff80000, +0x01fffffffff00000, +0x03ffffffffe00000, +0x07ffffffffc00000, +0x0fffffffff800000, +0x1fffffffff000000, +0x3ffffffffe000000, +0x7ffffffffc000000, +0xfffffffff8000000, +0xfffffffff0000001, +0xffffffffe0000003, +0xffffffffc0000007, +0xffffffff8000000f, +0xffffffff0000001f, +0xfffffffe0000003f, +0xfffffffc0000007f, +0xfffffff8000000ff, +0xfffffff0000001ff, +0xffffffe0000003ff, +0xffffffc0000007ff, +0xffffff8000000fff, +0xffffff0000001fff, +0xfffffe0000003fff, +0xfffffc0000007fff, +0xfffff8000000ffff, +0xfffff0000001ffff, +0xffffe0000003ffff, +0xffffc0000007ffff, +0xffff8000000fffff, +0xffff0000001fffff, +0xfffe0000003fffff, +0xfffc0000007fffff, +0xfff8000000ffffff, +0xfff0000001ffffff, +0xffe0000003ffffff, +0xffc0000007ffffff, +0xff8000000fffffff, +0xff0000001fffffff, +0xfe0000003fffffff, +0xfc0000007fffffff, +0xf8000000ffffffff, +0xf0000001ffffffff, +0xe0000003ffffffff, +0xc0000007ffffffff, +0x8000000fffffffff, +0x0000003fffffffff, +0x0000007ffffffffe, +0x000000fffffffffc, +0x000001fffffffff8, +0x000003fffffffff0, +0x000007ffffffffe0, +0x00000fffffffffc0, +0x00001fffffffff80, +0x00003fffffffff00, +0x00007ffffffffe00, +0x0000fffffffffc00, +0x0001fffffffff800, +0x0003fffffffff000, +0x0007ffffffffe000, +0x000fffffffffc000, +0x001fffffffff8000, +0x003fffffffff0000, +0x007ffffffffe0000, +0x00fffffffffc0000, +0x01fffffffff80000, +0x03fffffffff00000, +0x07ffffffffe00000, +0x0fffffffffc00000, +0x1fffffffff800000, +0x3fffffffff000000, +0x7ffffffffe000000, +0xfffffffffc000000, +0xfffffffff8000001, +0xfffffffff0000003, +0xffffffffe0000007, +0xffffffffc000000f, +0xffffffff8000001f, +0xffffffff0000003f, +0xfffffffe0000007f, +0xfffffffc000000ff, +0xfffffff8000001ff, +0xfffffff0000003ff, +0xffffffe0000007ff, +0xffffffc000000fff, +0xffffff8000001fff, +0xffffff0000003fff, +0xfffffe0000007fff, +0xfffffc000000ffff, +0xfffff8000001ffff, +0xfffff0000003ffff, +0xffffe0000007ffff, +0xffffc000000fffff, +0xffff8000001fffff, +0xffff0000003fffff, +0xfffe0000007fffff, +0xfffc000000ffffff, +0xfff8000001ffffff, +0xfff0000003ffffff, +0xffe0000007ffffff, +0xffc000000fffffff, +0xff8000001fffffff, +0xff0000003fffffff, +0xfe0000007fffffff, +0xfc000000ffffffff, +0xf8000001ffffffff, +0xf0000003ffffffff, +0xe0000007ffffffff, +0xc000000fffffffff, +0x8000001fffffffff, +0x0000007fffffffff, +0x000000fffffffffe, +0x000001fffffffffc, +0x000003fffffffff8, +0x000007fffffffff0, +0x00000fffffffffe0, +0x00001fffffffffc0, +0x00003fffffffff80, +0x00007fffffffff00, +0x0000fffffffffe00, +0x0001fffffffffc00, +0x0003fffffffff800, +0x0007fffffffff000, +0x000fffffffffe000, +0x001fffffffffc000, +0x003fffffffff8000, +0x007fffffffff0000, +0x00fffffffffe0000, +0x01fffffffffc0000, +0x03fffffffff80000, +0x07fffffffff00000, +0x0fffffffffe00000, +0x1fffffffffc00000, +0x3fffffffff800000, +0x7fffffffff000000, +0xfffffffffe000000, +0xfffffffffc000001, +0xfffffffff8000003, +0xfffffffff0000007, +0xffffffffe000000f, +0xffffffffc000001f, +0xffffffff8000003f, +0xffffffff0000007f, +0xfffffffe000000ff, +0xfffffffc000001ff, +0xfffffff8000003ff, +0xfffffff0000007ff, +0xffffffe000000fff, +0xffffffc000001fff, +0xffffff8000003fff, +0xffffff0000007fff, +0xfffffe000000ffff, +0xfffffc000001ffff, +0xfffff8000003ffff, +0xfffff0000007ffff, +0xffffe000000fffff, +0xffffc000001fffff, +0xffff8000003fffff, +0xffff0000007fffff, +0xfffe000000ffffff, +0xfffc000001ffffff, +0xfff8000003ffffff, +0xfff0000007ffffff, +0xffe000000fffffff, +0xffc000001fffffff, +0xff8000003fffffff, +0xff0000007fffffff, +0xfe000000ffffffff, +0xfc000001ffffffff, +0xf8000003ffffffff, +0xf0000007ffffffff, +0xe000000fffffffff, +0xc000001fffffffff, +0x8000003fffffffff, +0x000000ffffffffff, +0x000001fffffffffe, +0x000003fffffffffc, +0x000007fffffffff8, +0x00000ffffffffff0, +0x00001fffffffffe0, +0x00003fffffffffc0, +0x00007fffffffff80, +0x0000ffffffffff00, +0x0001fffffffffe00, +0x0003fffffffffc00, +0x0007fffffffff800, +0x000ffffffffff000, +0x001fffffffffe000, +0x003fffffffffc000, +0x007fffffffff8000, +0x00ffffffffff0000, +0x01fffffffffe0000, +0x03fffffffffc0000, +0x07fffffffff80000, +0x0ffffffffff00000, +0x1fffffffffe00000, +0x3fffffffffc00000, +0x7fffffffff800000, +0xffffffffff000000, +0xfffffffffe000001, +0xfffffffffc000003, +0xfffffffff8000007, +0xfffffffff000000f, +0xffffffffe000001f, +0xffffffffc000003f, +0xffffffff8000007f, +0xffffffff000000ff, +0xfffffffe000001ff, +0xfffffffc000003ff, +0xfffffff8000007ff, +0xfffffff000000fff, +0xffffffe000001fff, +0xffffffc000003fff, +0xffffff8000007fff, +0xffffff000000ffff, +0xfffffe000001ffff, +0xfffffc000003ffff, +0xfffff8000007ffff, +0xfffff000000fffff, +0xffffe000001fffff, +0xffffc000003fffff, +0xffff8000007fffff, +0xffff000000ffffff, +0xfffe000001ffffff, +0xfffc000003ffffff, +0xfff8000007ffffff, +0xfff000000fffffff, +0xffe000001fffffff, +0xffc000003fffffff, +0xff8000007fffffff, +0xff000000ffffffff, +0xfe000001ffffffff, +0xfc000003ffffffff, +0xf8000007ffffffff, +0xf000000fffffffff, +0xe000001fffffffff, +0xc000003fffffffff, +0x8000007fffffffff, +0x000001ffffffffff, +0x000003fffffffffe, +0x000007fffffffffc, +0x00000ffffffffff8, +0x00001ffffffffff0, +0x00003fffffffffe0, +0x00007fffffffffc0, +0x0000ffffffffff80, +0x0001ffffffffff00, +0x0003fffffffffe00, +0x0007fffffffffc00, +0x000ffffffffff800, +0x001ffffffffff000, +0x003fffffffffe000, +0x007fffffffffc000, +0x00ffffffffff8000, +0x01ffffffffff0000, +0x03fffffffffe0000, +0x07fffffffffc0000, +0x0ffffffffff80000, +0x1ffffffffff00000, +0x3fffffffffe00000, +0x7fffffffffc00000, +0xffffffffff800000, +0xffffffffff000001, +0xfffffffffe000003, +0xfffffffffc000007, +0xfffffffff800000f, +0xfffffffff000001f, +0xffffffffe000003f, +0xffffffffc000007f, +0xffffffff800000ff, +0xffffffff000001ff, +0xfffffffe000003ff, +0xfffffffc000007ff, +0xfffffff800000fff, +0xfffffff000001fff, +0xffffffe000003fff, +0xffffffc000007fff, +0xffffff800000ffff, +0xffffff000001ffff, +0xfffffe000003ffff, +0xfffffc000007ffff, +0xfffff800000fffff, +0xfffff000001fffff, +0xffffe000003fffff, +0xffffc000007fffff, +0xffff800000ffffff, +0xffff000001ffffff, +0xfffe000003ffffff, +0xfffc000007ffffff, +0xfff800000fffffff, +0xfff000001fffffff, +0xffe000003fffffff, +0xffc000007fffffff, +0xff800000ffffffff, +0xff000001ffffffff, +0xfe000003ffffffff, +0xfc000007ffffffff, +0xf800000fffffffff, +0xf000001fffffffff, +0xe000003fffffffff, +0xc000007fffffffff, +0x800000ffffffffff, +0x000003ffffffffff, +0x000007fffffffffe, +0x00000ffffffffffc, +0x00001ffffffffff8, +0x00003ffffffffff0, +0x00007fffffffffe0, +0x0000ffffffffffc0, +0x0001ffffffffff80, +0x0003ffffffffff00, +0x0007fffffffffe00, +0x000ffffffffffc00, +0x001ffffffffff800, +0x003ffffffffff000, +0x007fffffffffe000, +0x00ffffffffffc000, +0x01ffffffffff8000, +0x03ffffffffff0000, +0x07fffffffffe0000, +0x0ffffffffffc0000, +0x1ffffffffff80000, +0x3ffffffffff00000, +0x7fffffffffe00000, +0xffffffffffc00000, +0xffffffffff800001, +0xffffffffff000003, +0xfffffffffe000007, +0xfffffffffc00000f, +0xfffffffff800001f, +0xfffffffff000003f, +0xffffffffe000007f, +0xffffffffc00000ff, +0xffffffff800001ff, +0xffffffff000003ff, +0xfffffffe000007ff, +0xfffffffc00000fff, +0xfffffff800001fff, +0xfffffff000003fff, +0xffffffe000007fff, +0xffffffc00000ffff, +0xffffff800001ffff, +0xffffff000003ffff, +0xfffffe000007ffff, +0xfffffc00000fffff, +0xfffff800001fffff, +0xfffff000003fffff, +0xffffe000007fffff, +0xffffc00000ffffff, +0xffff800001ffffff, +0xffff000003ffffff, +0xfffe000007ffffff, +0xfffc00000fffffff, +0xfff800001fffffff, +0xfff000003fffffff, +0xffe000007fffffff, +0xffc00000ffffffff, +0xff800001ffffffff, +0xff000003ffffffff, +0xfe000007ffffffff, +0xfc00000fffffffff, +0xf800001fffffffff, +0xf000003fffffffff, +0xe000007fffffffff, +0xc00000ffffffffff, +0x800001ffffffffff, +0x000007ffffffffff, +0x00000ffffffffffe, +0x00001ffffffffffc, +0x00003ffffffffff8, +0x00007ffffffffff0, +0x0000ffffffffffe0, +0x0001ffffffffffc0, +0x0003ffffffffff80, +0x0007ffffffffff00, +0x000ffffffffffe00, +0x001ffffffffffc00, +0x003ffffffffff800, +0x007ffffffffff000, +0x00ffffffffffe000, +0x01ffffffffffc000, +0x03ffffffffff8000, +0x07ffffffffff0000, +0x0ffffffffffe0000, +0x1ffffffffffc0000, +0x3ffffffffff80000, +0x7ffffffffff00000, +0xffffffffffe00000, +0xffffffffffc00001, +0xffffffffff800003, +0xffffffffff000007, +0xfffffffffe00000f, +0xfffffffffc00001f, +0xfffffffff800003f, +0xfffffffff000007f, +0xffffffffe00000ff, +0xffffffffc00001ff, +0xffffffff800003ff, +0xffffffff000007ff, +0xfffffffe00000fff, +0xfffffffc00001fff, +0xfffffff800003fff, +0xfffffff000007fff, +0xffffffe00000ffff, +0xffffffc00001ffff, +0xffffff800003ffff, +0xffffff000007ffff, +0xfffffe00000fffff, +0xfffffc00001fffff, +0xfffff800003fffff, +0xfffff000007fffff, +0xffffe00000ffffff, +0xffffc00001ffffff, +0xffff800003ffffff, +0xffff000007ffffff, +0xfffe00000fffffff, +0xfffc00001fffffff, +0xfff800003fffffff, +0xfff000007fffffff, +0xffe00000ffffffff, +0xffc00001ffffffff, +0xff800003ffffffff, +0xff000007ffffffff, +0xfe00000fffffffff, +0xfc00001fffffffff, +0xf800003fffffffff, +0xf000007fffffffff, +0xe00000ffffffffff, +0xc00001ffffffffff, +0x800003ffffffffff, +0x00000fffffffffff, +0x00001ffffffffffe, +0x00003ffffffffffc, +0x00007ffffffffff8, +0x0000fffffffffff0, +0x0001ffffffffffe0, +0x0003ffffffffffc0, +0x0007ffffffffff80, +0x000fffffffffff00, +0x001ffffffffffe00, +0x003ffffffffffc00, +0x007ffffffffff800, +0x00fffffffffff000, +0x01ffffffffffe000, +0x03ffffffffffc000, +0x07ffffffffff8000, +0x0fffffffffff0000, +0x1ffffffffffe0000, +0x3ffffffffffc0000, +0x7ffffffffff80000, +0xfffffffffff00000, +0xffffffffffe00001, +0xffffffffffc00003, +0xffffffffff800007, +0xffffffffff00000f, +0xfffffffffe00001f, +0xfffffffffc00003f, +0xfffffffff800007f, +0xfffffffff00000ff, +0xffffffffe00001ff, +0xffffffffc00003ff, +0xffffffff800007ff, +0xffffffff00000fff, +0xfffffffe00001fff, +0xfffffffc00003fff, +0xfffffff800007fff, +0xfffffff00000ffff, +0xffffffe00001ffff, +0xffffffc00003ffff, +0xffffff800007ffff, +0xffffff00000fffff, +0xfffffe00001fffff, +0xfffffc00003fffff, +0xfffff800007fffff, +0xfffff00000ffffff, +0xffffe00001ffffff, +0xffffc00003ffffff, +0xffff800007ffffff, +0xffff00000fffffff, +0xfffe00001fffffff, +0xfffc00003fffffff, +0xfff800007fffffff, +0xfff00000ffffffff, +0xffe00001ffffffff, +0xffc00003ffffffff, +0xff800007ffffffff, +0xff00000fffffffff, +0xfe00001fffffffff, +0xfc00003fffffffff, +0xf800007fffffffff, +0xf00000ffffffffff, +0xe00001ffffffffff, +0xc00003ffffffffff, +0x800007ffffffffff, +0x00001fffffffffff, +0x00003ffffffffffe, +0x00007ffffffffffc, +0x0000fffffffffff8, +0x0001fffffffffff0, +0x0003ffffffffffe0, +0x0007ffffffffffc0, +0x000fffffffffff80, +0x001fffffffffff00, +0x003ffffffffffe00, +0x007ffffffffffc00, +0x00fffffffffff800, +0x01fffffffffff000, +0x03ffffffffffe000, +0x07ffffffffffc000, +0x0fffffffffff8000, +0x1fffffffffff0000, +0x3ffffffffffe0000, +0x7ffffffffffc0000, +0xfffffffffff80000, +0xfffffffffff00001, +0xffffffffffe00003, +0xffffffffffc00007, +0xffffffffff80000f, +0xffffffffff00001f, +0xfffffffffe00003f, +0xfffffffffc00007f, +0xfffffffff80000ff, +0xfffffffff00001ff, +0xffffffffe00003ff, +0xffffffffc00007ff, +0xffffffff80000fff, +0xffffffff00001fff, +0xfffffffe00003fff, +0xfffffffc00007fff, +0xfffffff80000ffff, +0xfffffff00001ffff, +0xffffffe00003ffff, +0xffffffc00007ffff, +0xffffff80000fffff, +0xffffff00001fffff, +0xfffffe00003fffff, +0xfffffc00007fffff, +0xfffff80000ffffff, +0xfffff00001ffffff, +0xffffe00003ffffff, +0xffffc00007ffffff, +0xffff80000fffffff, +0xffff00001fffffff, +0xfffe00003fffffff, +0xfffc00007fffffff, +0xfff80000ffffffff, +0xfff00001ffffffff, +0xffe00003ffffffff, +0xffc00007ffffffff, +0xff80000fffffffff, +0xff00001fffffffff, +0xfe00003fffffffff, +0xfc00007fffffffff, +0xf80000ffffffffff, +0xf00001ffffffffff, +0xe00003ffffffffff, +0xc00007ffffffffff, +0x80000fffffffffff, +0x00003fffffffffff, +0x00007ffffffffffe, +0x0000fffffffffffc, +0x0001fffffffffff8, +0x0003fffffffffff0, +0x0007ffffffffffe0, +0x000fffffffffffc0, +0x001fffffffffff80, +0x003fffffffffff00, +0x007ffffffffffe00, +0x00fffffffffffc00, +0x01fffffffffff800, +0x03fffffffffff000, +0x07ffffffffffe000, +0x0fffffffffffc000, +0x1fffffffffff8000, +0x3fffffffffff0000, +0x7ffffffffffe0000, +0xfffffffffffc0000, +0xfffffffffff80001, +0xfffffffffff00003, +0xffffffffffe00007, +0xffffffffffc0000f, +0xffffffffff80001f, +0xffffffffff00003f, +0xfffffffffe00007f, +0xfffffffffc0000ff, +0xfffffffff80001ff, +0xfffffffff00003ff, +0xffffffffe00007ff, +0xffffffffc0000fff, +0xffffffff80001fff, +0xffffffff00003fff, +0xfffffffe00007fff, +0xfffffffc0000ffff, +0xfffffff80001ffff, +0xfffffff00003ffff, +0xffffffe00007ffff, +0xffffffc0000fffff, +0xffffff80001fffff, +0xffffff00003fffff, +0xfffffe00007fffff, +0xfffffc0000ffffff, +0xfffff80001ffffff, +0xfffff00003ffffff, +0xffffe00007ffffff, +0xffffc0000fffffff, +0xffff80001fffffff, +0xffff00003fffffff, +0xfffe00007fffffff, +0xfffc0000ffffffff, +0xfff80001ffffffff, +0xfff00003ffffffff, +0xffe00007ffffffff, +0xffc0000fffffffff, +0xff80001fffffffff, +0xff00003fffffffff, +0xfe00007fffffffff, +0xfc0000ffffffffff, +0xf80001ffffffffff, +0xf00003ffffffffff, +0xe00007ffffffffff, +0xc0000fffffffffff, +0x80001fffffffffff, +0x00007fffffffffff, +0x0000fffffffffffe, +0x0001fffffffffffc, +0x0003fffffffffff8, +0x0007fffffffffff0, +0x000fffffffffffe0, +0x001fffffffffffc0, +0x003fffffffffff80, +0x007fffffffffff00, +0x00fffffffffffe00, +0x01fffffffffffc00, +0x03fffffffffff800, +0x07fffffffffff000, +0x0fffffffffffe000, +0x1fffffffffffc000, +0x3fffffffffff8000, +0x7fffffffffff0000, +0xfffffffffffe0000, +0xfffffffffffc0001, +0xfffffffffff80003, +0xfffffffffff00007, +0xffffffffffe0000f, +0xffffffffffc0001f, +0xffffffffff80003f, +0xffffffffff00007f, +0xfffffffffe0000ff, +0xfffffffffc0001ff, +0xfffffffff80003ff, +0xfffffffff00007ff, +0xffffffffe0000fff, +0xffffffffc0001fff, +0xffffffff80003fff, +0xffffffff00007fff, +0xfffffffe0000ffff, +0xfffffffc0001ffff, +0xfffffff80003ffff, +0xfffffff00007ffff, +0xffffffe0000fffff, +0xffffffc0001fffff, +0xffffff80003fffff, +0xffffff00007fffff, +0xfffffe0000ffffff, +0xfffffc0001ffffff, +0xfffff80003ffffff, +0xfffff00007ffffff, +0xffffe0000fffffff, +0xffffc0001fffffff, +0xffff80003fffffff, +0xffff00007fffffff, +0xfffe0000ffffffff, +0xfffc0001ffffffff, +0xfff80003ffffffff, +0xfff00007ffffffff, +0xffe0000fffffffff, +0xffc0001fffffffff, +0xff80003fffffffff, +0xff00007fffffffff, +0xfe0000ffffffffff, +0xfc0001ffffffffff, +0xf80003ffffffffff, +0xf00007ffffffffff, +0xe0000fffffffffff, +0xc0001fffffffffff, +0x80003fffffffffff, +0x0000ffffffffffff, +0x0001fffffffffffe, +0x0003fffffffffffc, +0x0007fffffffffff8, +0x000ffffffffffff0, +0x001fffffffffffe0, +0x003fffffffffffc0, +0x007fffffffffff80, +0x00ffffffffffff00, +0x01fffffffffffe00, +0x03fffffffffffc00, +0x07fffffffffff800, +0x0ffffffffffff000, +0x1fffffffffffe000, +0x3fffffffffffc000, +0x7fffffffffff8000, +0xffffffffffff0000, +0xfffffffffffe0001, +0xfffffffffffc0003, +0xfffffffffff80007, +0xfffffffffff0000f, +0xffffffffffe0001f, +0xffffffffffc0003f, +0xffffffffff80007f, +0xffffffffff0000ff, +0xfffffffffe0001ff, +0xfffffffffc0003ff, +0xfffffffff80007ff, +0xfffffffff0000fff, +0xffffffffe0001fff, +0xffffffffc0003fff, +0xffffffff80007fff, +0xffffffff0000ffff, +0xfffffffe0001ffff, +0xfffffffc0003ffff, +0xfffffff80007ffff, +0xfffffff0000fffff, +0xffffffe0001fffff, +0xffffffc0003fffff, +0xffffff80007fffff, +0xffffff0000ffffff, +0xfffffe0001ffffff, +0xfffffc0003ffffff, +0xfffff80007ffffff, +0xfffff0000fffffff, +0xffffe0001fffffff, +0xffffc0003fffffff, +0xffff80007fffffff, +0xffff0000ffffffff, +0xfffe0001ffffffff, +0xfffc0003ffffffff, +0xfff80007ffffffff, +0xfff0000fffffffff, +0xffe0001fffffffff, +0xffc0003fffffffff, +0xff80007fffffffff, +0xff0000ffffffffff, +0xfe0001ffffffffff, +0xfc0003ffffffffff, +0xf80007ffffffffff, +0xf0000fffffffffff, +0xe0001fffffffffff, +0xc0003fffffffffff, +0x80007fffffffffff, +0x0001ffffffffffff, +0x0003fffffffffffe, +0x0007fffffffffffc, +0x000ffffffffffff8, +0x001ffffffffffff0, +0x003fffffffffffe0, +0x007fffffffffffc0, +0x00ffffffffffff80, +0x01ffffffffffff00, +0x03fffffffffffe00, +0x07fffffffffffc00, +0x0ffffffffffff800, +0x1ffffffffffff000, +0x3fffffffffffe000, +0x7fffffffffffc000, +0xffffffffffff8000, +0xffffffffffff0001, +0xfffffffffffe0003, +0xfffffffffffc0007, +0xfffffffffff8000f, +0xfffffffffff0001f, +0xffffffffffe0003f, +0xffffffffffc0007f, +0xffffffffff8000ff, +0xffffffffff0001ff, +0xfffffffffe0003ff, +0xfffffffffc0007ff, +0xfffffffff8000fff, +0xfffffffff0001fff, +0xffffffffe0003fff, +0xffffffffc0007fff, +0xffffffff8000ffff, +0xffffffff0001ffff, +0xfffffffe0003ffff, +0xfffffffc0007ffff, +0xfffffff8000fffff, +0xfffffff0001fffff, +0xffffffe0003fffff, +0xffffffc0007fffff, +0xffffff8000ffffff, +0xffffff0001ffffff, +0xfffffe0003ffffff, +0xfffffc0007ffffff, +0xfffff8000fffffff, +0xfffff0001fffffff, +0xffffe0003fffffff, +0xffffc0007fffffff, +0xffff8000ffffffff, +0xffff0001ffffffff, +0xfffe0003ffffffff, +0xfffc0007ffffffff, +0xfff8000fffffffff, +0xfff0001fffffffff, +0xffe0003fffffffff, +0xffc0007fffffffff, +0xff8000ffffffffff, +0xff0001ffffffffff, +0xfe0003ffffffffff, +0xfc0007ffffffffff, +0xf8000fffffffffff, +0xf0001fffffffffff, +0xe0003fffffffffff, +0xc0007fffffffffff, +0x8000ffffffffffff, +0x0003ffffffffffff, +0x0007fffffffffffe, +0x000ffffffffffffc, +0x001ffffffffffff8, +0x003ffffffffffff0, +0x007fffffffffffe0, +0x00ffffffffffffc0, +0x01ffffffffffff80, +0x03ffffffffffff00, +0x07fffffffffffe00, +0x0ffffffffffffc00, +0x1ffffffffffff800, +0x3ffffffffffff000, +0x7fffffffffffe000, +0xffffffffffffc000, +0xffffffffffff8001, +0xffffffffffff0003, +0xfffffffffffe0007, +0xfffffffffffc000f, +0xfffffffffff8001f, +0xfffffffffff0003f, +0xffffffffffe0007f, +0xffffffffffc000ff, +0xffffffffff8001ff, +0xffffffffff0003ff, +0xfffffffffe0007ff, +0xfffffffffc000fff, +0xfffffffff8001fff, +0xfffffffff0003fff, +0xffffffffe0007fff, +0xffffffffc000ffff, +0xffffffff8001ffff, +0xffffffff0003ffff, +0xfffffffe0007ffff, +0xfffffffc000fffff, +0xfffffff8001fffff, +0xfffffff0003fffff, +0xffffffe0007fffff, +0xffffffc000ffffff, +0xffffff8001ffffff, +0xffffff0003ffffff, +0xfffffe0007ffffff, +0xfffffc000fffffff, +0xfffff8001fffffff, +0xfffff0003fffffff, +0xffffe0007fffffff, +0xffffc000ffffffff, +0xffff8001ffffffff, +0xffff0003ffffffff, +0xfffe0007ffffffff, +0xfffc000fffffffff, +0xfff8001fffffffff, +0xfff0003fffffffff, +0xffe0007fffffffff, +0xffc000ffffffffff, +0xff8001ffffffffff, +0xff0003ffffffffff, +0xfe0007ffffffffff, +0xfc000fffffffffff, +0xf8001fffffffffff, +0xf0003fffffffffff, +0xe0007fffffffffff, +0xc000ffffffffffff, +0x8001ffffffffffff, +0x0007ffffffffffff, +0x000ffffffffffffe, +0x001ffffffffffffc, +0x003ffffffffffff8, +0x007ffffffffffff0, +0x00ffffffffffffe0, +0x01ffffffffffffc0, +0x03ffffffffffff80, +0x07ffffffffffff00, +0x0ffffffffffffe00, +0x1ffffffffffffc00, +0x3ffffffffffff800, +0x7ffffffffffff000, +0xffffffffffffe000, +0xffffffffffffc001, +0xffffffffffff8003, +0xffffffffffff0007, +0xfffffffffffe000f, +0xfffffffffffc001f, +0xfffffffffff8003f, +0xfffffffffff0007f, +0xffffffffffe000ff, +0xffffffffffc001ff, +0xffffffffff8003ff, +0xffffffffff0007ff, +0xfffffffffe000fff, +0xfffffffffc001fff, +0xfffffffff8003fff, +0xfffffffff0007fff, +0xffffffffe000ffff, +0xffffffffc001ffff, +0xffffffff8003ffff, +0xffffffff0007ffff, +0xfffffffe000fffff, +0xfffffffc001fffff, +0xfffffff8003fffff, +0xfffffff0007fffff, +0xffffffe000ffffff, +0xffffffc001ffffff, +0xffffff8003ffffff, +0xffffff0007ffffff, +0xfffffe000fffffff, +0xfffffc001fffffff, +0xfffff8003fffffff, +0xfffff0007fffffff, +0xffffe000ffffffff, +0xffffc001ffffffff, +0xffff8003ffffffff, +0xffff0007ffffffff, +0xfffe000fffffffff, +0xfffc001fffffffff, +0xfff8003fffffffff, +0xfff0007fffffffff, +0xffe000ffffffffff, +0xffc001ffffffffff, +0xff8003ffffffffff, +0xff0007ffffffffff, +0xfe000fffffffffff, +0xfc001fffffffffff, +0xf8003fffffffffff, +0xf0007fffffffffff, +0xe000ffffffffffff, +0xc001ffffffffffff, +0x8003ffffffffffff, +0x000fffffffffffff, +0x001ffffffffffffe, +0x003ffffffffffffc, +0x007ffffffffffff8, +0x00fffffffffffff0, +0x01ffffffffffffe0, +0x03ffffffffffffc0, +0x07ffffffffffff80, +0x0fffffffffffff00, +0x1ffffffffffffe00, +0x3ffffffffffffc00, +0x7ffffffffffff800, +0xfffffffffffff000, +0xffffffffffffe001, +0xffffffffffffc003, +0xffffffffffff8007, +0xffffffffffff000f, +0xfffffffffffe001f, +0xfffffffffffc003f, +0xfffffffffff8007f, +0xfffffffffff000ff, +0xffffffffffe001ff, +0xffffffffffc003ff, +0xffffffffff8007ff, +0xffffffffff000fff, +0xfffffffffe001fff, +0xfffffffffc003fff, +0xfffffffff8007fff, +0xfffffffff000ffff, +0xffffffffe001ffff, +0xffffffffc003ffff, +0xffffffff8007ffff, +0xffffffff000fffff, +0xfffffffe001fffff, +0xfffffffc003fffff, +0xfffffff8007fffff, +0xfffffff000ffffff, +0xffffffe001ffffff, +0xffffffc003ffffff, +0xffffff8007ffffff, +0xffffff000fffffff, +0xfffffe001fffffff, +0xfffffc003fffffff, +0xfffff8007fffffff, +0xfffff000ffffffff, +0xffffe001ffffffff, +0xffffc003ffffffff, +0xffff8007ffffffff, +0xffff000fffffffff, +0xfffe001fffffffff, +0xfffc003fffffffff, +0xfff8007fffffffff, +0xfff000ffffffffff, +0xffe001ffffffffff, +0xffc003ffffffffff, +0xff8007ffffffffff, +0xff000fffffffffff, +0xfe001fffffffffff, +0xfc003fffffffffff, +0xf8007fffffffffff, +0xf000ffffffffffff, +0xe001ffffffffffff, +0xc003ffffffffffff, +0x8007ffffffffffff, +0x001fffffffffffff, +0x003ffffffffffffe, +0x007ffffffffffffc, +0x00fffffffffffff8, +0x01fffffffffffff0, +0x03ffffffffffffe0, +0x07ffffffffffffc0, +0x0fffffffffffff80, +0x1fffffffffffff00, +0x3ffffffffffffe00, +0x7ffffffffffffc00, +0xfffffffffffff800, +0xfffffffffffff001, +0xffffffffffffe003, +0xffffffffffffc007, +0xffffffffffff800f, +0xffffffffffff001f, +0xfffffffffffe003f, +0xfffffffffffc007f, +0xfffffffffff800ff, +0xfffffffffff001ff, +0xffffffffffe003ff, +0xffffffffffc007ff, +0xffffffffff800fff, +0xffffffffff001fff, +0xfffffffffe003fff, +0xfffffffffc007fff, +0xfffffffff800ffff, +0xfffffffff001ffff, +0xffffffffe003ffff, +0xffffffffc007ffff, +0xffffffff800fffff, +0xffffffff001fffff, +0xfffffffe003fffff, +0xfffffffc007fffff, +0xfffffff800ffffff, +0xfffffff001ffffff, +0xffffffe003ffffff, +0xffffffc007ffffff, +0xffffff800fffffff, +0xffffff001fffffff, +0xfffffe003fffffff, +0xfffffc007fffffff, +0xfffff800ffffffff, +0xfffff001ffffffff, +0xffffe003ffffffff, +0xffffc007ffffffff, +0xffff800fffffffff, +0xffff001fffffffff, +0xfffe003fffffffff, +0xfffc007fffffffff, +0xfff800ffffffffff, +0xfff001ffffffffff, +0xffe003ffffffffff, +0xffc007ffffffffff, +0xff800fffffffffff, +0xff001fffffffffff, +0xfe003fffffffffff, +0xfc007fffffffffff, +0xf800ffffffffffff, +0xf001ffffffffffff, +0xe003ffffffffffff, +0xc007ffffffffffff, +0x800fffffffffffff, +0x003fffffffffffff, +0x007ffffffffffffe, +0x00fffffffffffffc, +0x01fffffffffffff8, +0x03fffffffffffff0, +0x07ffffffffffffe0, +0x0fffffffffffffc0, +0x1fffffffffffff80, +0x3fffffffffffff00, +0x7ffffffffffffe00, +0xfffffffffffffc00, +0xfffffffffffff801, +0xfffffffffffff003, +0xffffffffffffe007, +0xffffffffffffc00f, +0xffffffffffff801f, +0xffffffffffff003f, +0xfffffffffffe007f, +0xfffffffffffc00ff, +0xfffffffffff801ff, +0xfffffffffff003ff, +0xffffffffffe007ff, +0xffffffffffc00fff, +0xffffffffff801fff, +0xffffffffff003fff, +0xfffffffffe007fff, +0xfffffffffc00ffff, +0xfffffffff801ffff, +0xfffffffff003ffff, +0xffffffffe007ffff, +0xffffffffc00fffff, +0xffffffff801fffff, +0xffffffff003fffff, +0xfffffffe007fffff, +0xfffffffc00ffffff, +0xfffffff801ffffff, +0xfffffff003ffffff, +0xffffffe007ffffff, +0xffffffc00fffffff, +0xffffff801fffffff, +0xffffff003fffffff, +0xfffffe007fffffff, +0xfffffc00ffffffff, +0xfffff801ffffffff, +0xfffff003ffffffff, +0xffffe007ffffffff, +0xffffc00fffffffff, +0xffff801fffffffff, +0xffff003fffffffff, +0xfffe007fffffffff, +0xfffc00ffffffffff, +0xfff801ffffffffff, +0xfff003ffffffffff, +0xffe007ffffffffff, +0xffc00fffffffffff, +0xff801fffffffffff, +0xff003fffffffffff, +0xfe007fffffffffff, +0xfc00ffffffffffff, +0xf801ffffffffffff, +0xf003ffffffffffff, +0xe007ffffffffffff, +0xc00fffffffffffff, +0x801fffffffffffff, +0x007fffffffffffff, +0x00fffffffffffffe, +0x01fffffffffffffc, +0x03fffffffffffff8, +0x07fffffffffffff0, +0x0fffffffffffffe0, +0x1fffffffffffffc0, +0x3fffffffffffff80, +0x7fffffffffffff00, +0xfffffffffffffe00, +0xfffffffffffffc01, +0xfffffffffffff803, +0xfffffffffffff007, +0xffffffffffffe00f, +0xffffffffffffc01f, +0xffffffffffff803f, +0xffffffffffff007f, +0xfffffffffffe00ff, +0xfffffffffffc01ff, +0xfffffffffff803ff, +0xfffffffffff007ff, +0xffffffffffe00fff, +0xffffffffffc01fff, +0xffffffffff803fff, +0xffffffffff007fff, +0xfffffffffe00ffff, +0xfffffffffc01ffff, +0xfffffffff803ffff, +0xfffffffff007ffff, +0xffffffffe00fffff, +0xffffffffc01fffff, +0xffffffff803fffff, +0xffffffff007fffff, +0xfffffffe00ffffff, +0xfffffffc01ffffff, +0xfffffff803ffffff, +0xfffffff007ffffff, +0xffffffe00fffffff, +0xffffffc01fffffff, +0xffffff803fffffff, +0xffffff007fffffff, +0xfffffe00ffffffff, +0xfffffc01ffffffff, +0xfffff803ffffffff, +0xfffff007ffffffff, +0xffffe00fffffffff, +0xffffc01fffffffff, +0xffff803fffffffff, +0xffff007fffffffff, +0xfffe00ffffffffff, +0xfffc01ffffffffff, +0xfff803ffffffffff, +0xfff007ffffffffff, +0xffe00fffffffffff, +0xffc01fffffffffff, +0xff803fffffffffff, +0xff007fffffffffff, +0xfe00ffffffffffff, +0xfc01ffffffffffff, +0xf803ffffffffffff, +0xf007ffffffffffff, +0xe00fffffffffffff, +0xc01fffffffffffff, +0x803fffffffffffff, +0x00ffffffffffffff, +0x01fffffffffffffe, +0x03fffffffffffffc, +0x07fffffffffffff8, +0x0ffffffffffffff0, +0x1fffffffffffffe0, +0x3fffffffffffffc0, +0x7fffffffffffff80, +0xffffffffffffff00, +0xfffffffffffffe01, +0xfffffffffffffc03, +0xfffffffffffff807, +0xfffffffffffff00f, +0xffffffffffffe01f, +0xffffffffffffc03f, +0xffffffffffff807f, +0xffffffffffff00ff, +0xfffffffffffe01ff, +0xfffffffffffc03ff, +0xfffffffffff807ff, +0xfffffffffff00fff, +0xffffffffffe01fff, +0xffffffffffc03fff, +0xffffffffff807fff, +0xffffffffff00ffff, +0xfffffffffe01ffff, +0xfffffffffc03ffff, +0xfffffffff807ffff, +0xfffffffff00fffff, +0xffffffffe01fffff, +0xffffffffc03fffff, +0xffffffff807fffff, +0xffffffff00ffffff, +0xfffffffe01ffffff, +0xfffffffc03ffffff, +0xfffffff807ffffff, +0xfffffff00fffffff, +0xffffffe01fffffff, +0xffffffc03fffffff, +0xffffff807fffffff, +0xffffff00ffffffff, +0xfffffe01ffffffff, +0xfffffc03ffffffff, +0xfffff807ffffffff, +0xfffff00fffffffff, +0xffffe01fffffffff, +0xffffc03fffffffff, +0xffff807fffffffff, +0xffff00ffffffffff, +0xfffe01ffffffffff, +0xfffc03ffffffffff, +0xfff807ffffffffff, +0xfff00fffffffffff, +0xffe01fffffffffff, +0xffc03fffffffffff, +0xff807fffffffffff, +0xff00ffffffffffff, +0xfe01ffffffffffff, +0xfc03ffffffffffff, +0xf807ffffffffffff, +0xf00fffffffffffff, +0xe01fffffffffffff, +0xc03fffffffffffff, +0x807fffffffffffff, +0x01ffffffffffffff, +0x03fffffffffffffe, +0x07fffffffffffffc, +0x0ffffffffffffff8, +0x1ffffffffffffff0, +0x3fffffffffffffe0, +0x7fffffffffffffc0, +0xffffffffffffff80, +0xffffffffffffff01, +0xfffffffffffffe03, +0xfffffffffffffc07, +0xfffffffffffff80f, +0xfffffffffffff01f, +0xffffffffffffe03f, +0xffffffffffffc07f, +0xffffffffffff80ff, +0xffffffffffff01ff, +0xfffffffffffe03ff, +0xfffffffffffc07ff, +0xfffffffffff80fff, +0xfffffffffff01fff, +0xffffffffffe03fff, +0xffffffffffc07fff, +0xffffffffff80ffff, +0xffffffffff01ffff, +0xfffffffffe03ffff, +0xfffffffffc07ffff, +0xfffffffff80fffff, +0xfffffffff01fffff, +0xffffffffe03fffff, +0xffffffffc07fffff, +0xffffffff80ffffff, +0xffffffff01ffffff, +0xfffffffe03ffffff, +0xfffffffc07ffffff, +0xfffffff80fffffff, +0xfffffff01fffffff, +0xffffffe03fffffff, +0xffffffc07fffffff, +0xffffff80ffffffff, +0xffffff01ffffffff, +0xfffffe03ffffffff, +0xfffffc07ffffffff, +0xfffff80fffffffff, +0xfffff01fffffffff, +0xffffe03fffffffff, +0xffffc07fffffffff, +0xffff80ffffffffff, +0xffff01ffffffffff, +0xfffe03ffffffffff, +0xfffc07ffffffffff, +0xfff80fffffffffff, +0xfff01fffffffffff, +0xffe03fffffffffff, +0xffc07fffffffffff, +0xff80ffffffffffff, +0xff01ffffffffffff, +0xfe03ffffffffffff, +0xfc07ffffffffffff, +0xf80fffffffffffff, +0xf01fffffffffffff, +0xe03fffffffffffff, +0xc07fffffffffffff, +0x80ffffffffffffff, +0x03ffffffffffffff, +0x07fffffffffffffe, +0x0ffffffffffffffc, +0x1ffffffffffffff8, +0x3ffffffffffffff0, +0x7fffffffffffffe0, +0xffffffffffffffc0, +0xffffffffffffff81, +0xffffffffffffff03, +0xfffffffffffffe07, +0xfffffffffffffc0f, +0xfffffffffffff81f, +0xfffffffffffff03f, +0xffffffffffffe07f, +0xffffffffffffc0ff, +0xffffffffffff81ff, +0xffffffffffff03ff, +0xfffffffffffe07ff, +0xfffffffffffc0fff, +0xfffffffffff81fff, +0xfffffffffff03fff, +0xffffffffffe07fff, +0xffffffffffc0ffff, +0xffffffffff81ffff, +0xffffffffff03ffff, +0xfffffffffe07ffff, +0xfffffffffc0fffff, +0xfffffffff81fffff, +0xfffffffff03fffff, +0xffffffffe07fffff, +0xffffffffc0ffffff, +0xffffffff81ffffff, +0xffffffff03ffffff, +0xfffffffe07ffffff, +0xfffffffc0fffffff, +0xfffffff81fffffff, +0xfffffff03fffffff, +0xffffffe07fffffff, +0xffffffc0ffffffff, +0xffffff81ffffffff, +0xffffff03ffffffff, +0xfffffe07ffffffff, +0xfffffc0fffffffff, +0xfffff81fffffffff, +0xfffff03fffffffff, +0xffffe07fffffffff, +0xffffc0ffffffffff, +0xffff81ffffffffff, +0xffff03ffffffffff, +0xfffe07ffffffffff, +0xfffc0fffffffffff, +0xfff81fffffffffff, +0xfff03fffffffffff, +0xffe07fffffffffff, +0xffc0ffffffffffff, +0xff81ffffffffffff, +0xff03ffffffffffff, +0xfe07ffffffffffff, +0xfc0fffffffffffff, +0xf81fffffffffffff, +0xf03fffffffffffff, +0xe07fffffffffffff, +0xc0ffffffffffffff, +0x81ffffffffffffff, +0x07ffffffffffffff, +0x0ffffffffffffffe, +0x1ffffffffffffffc, +0x3ffffffffffffff8, +0x7ffffffffffffff0, +0xffffffffffffffe0, +0xffffffffffffffc1, +0xffffffffffffff83, +0xffffffffffffff07, +0xfffffffffffffe0f, +0xfffffffffffffc1f, +0xfffffffffffff83f, +0xfffffffffffff07f, +0xffffffffffffe0ff, +0xffffffffffffc1ff, +0xffffffffffff83ff, +0xffffffffffff07ff, +0xfffffffffffe0fff, +0xfffffffffffc1fff, +0xfffffffffff83fff, +0xfffffffffff07fff, +0xffffffffffe0ffff, +0xffffffffffc1ffff, +0xffffffffff83ffff, +0xffffffffff07ffff, +0xfffffffffe0fffff, +0xfffffffffc1fffff, +0xfffffffff83fffff, +0xfffffffff07fffff, +0xffffffffe0ffffff, +0xffffffffc1ffffff, +0xffffffff83ffffff, +0xffffffff07ffffff, +0xfffffffe0fffffff, +0xfffffffc1fffffff, +0xfffffff83fffffff, +0xfffffff07fffffff, +0xffffffe0ffffffff, +0xffffffc1ffffffff, +0xffffff83ffffffff, +0xffffff07ffffffff, +0xfffffe0fffffffff, +0xfffffc1fffffffff, +0xfffff83fffffffff, +0xfffff07fffffffff, +0xffffe0ffffffffff, +0xffffc1ffffffffff, +0xffff83ffffffffff, +0xffff07ffffffffff, +0xfffe0fffffffffff, +0xfffc1fffffffffff, +0xfff83fffffffffff, +0xfff07fffffffffff, +0xffe0ffffffffffff, +0xffc1ffffffffffff, +0xff83ffffffffffff, +0xff07ffffffffffff, +0xfe0fffffffffffff, +0xfc1fffffffffffff, +0xf83fffffffffffff, +0xf07fffffffffffff, +0xe0ffffffffffffff, +0xc1ffffffffffffff, +0x83ffffffffffffff, +0x0fffffffffffffff, +0x1ffffffffffffffe, +0x3ffffffffffffffc, +0x7ffffffffffffff8, +0xfffffffffffffff0, +0xffffffffffffffe1, +0xffffffffffffffc3, +0xffffffffffffff87, +0xffffffffffffff0f, +0xfffffffffffffe1f, +0xfffffffffffffc3f, +0xfffffffffffff87f, +0xfffffffffffff0ff, +0xffffffffffffe1ff, +0xffffffffffffc3ff, +0xffffffffffff87ff, +0xffffffffffff0fff, +0xfffffffffffe1fff, +0xfffffffffffc3fff, +0xfffffffffff87fff, +0xfffffffffff0ffff, +0xffffffffffe1ffff, +0xffffffffffc3ffff, +0xffffffffff87ffff, +0xffffffffff0fffff, +0xfffffffffe1fffff, +0xfffffffffc3fffff, +0xfffffffff87fffff, +0xfffffffff0ffffff, +0xffffffffe1ffffff, +0xffffffffc3ffffff, +0xffffffff87ffffff, +0xffffffff0fffffff, +0xfffffffe1fffffff, +0xfffffffc3fffffff, +0xfffffff87fffffff, +0xfffffff0ffffffff, +0xffffffe1ffffffff, +0xffffffc3ffffffff, +0xffffff87ffffffff, +0xffffff0fffffffff, +0xfffffe1fffffffff, +0xfffffc3fffffffff, +0xfffff87fffffffff, +0xfffff0ffffffffff, +0xffffe1ffffffffff, +0xffffc3ffffffffff, +0xffff87ffffffffff, +0xffff0fffffffffff, +0xfffe1fffffffffff, +0xfffc3fffffffffff, +0xfff87fffffffffff, +0xfff0ffffffffffff, +0xffe1ffffffffffff, +0xffc3ffffffffffff, +0xff87ffffffffffff, +0xff0fffffffffffff, +0xfe1fffffffffffff, +0xfc3fffffffffffff, +0xf87fffffffffffff, +0xf0ffffffffffffff, +0xe1ffffffffffffff, +0xc3ffffffffffffff, +0x87ffffffffffffff, +0x1fffffffffffffff, +0x3ffffffffffffffe, +0x7ffffffffffffffc, +0xfffffffffffffff8, +0xfffffffffffffff1, +0xffffffffffffffe3, +0xffffffffffffffc7, +0xffffffffffffff8f, +0xffffffffffffff1f, +0xfffffffffffffe3f, +0xfffffffffffffc7f, +0xfffffffffffff8ff, +0xfffffffffffff1ff, +0xffffffffffffe3ff, +0xffffffffffffc7ff, +0xffffffffffff8fff, +0xffffffffffff1fff, +0xfffffffffffe3fff, +0xfffffffffffc7fff, +0xfffffffffff8ffff, +0xfffffffffff1ffff, +0xffffffffffe3ffff, +0xffffffffffc7ffff, +0xffffffffff8fffff, +0xffffffffff1fffff, +0xfffffffffe3fffff, +0xfffffffffc7fffff, +0xfffffffff8ffffff, +0xfffffffff1ffffff, +0xffffffffe3ffffff, +0xffffffffc7ffffff, +0xffffffff8fffffff, +0xffffffff1fffffff, +0xfffffffe3fffffff, +0xfffffffc7fffffff, +0xfffffff8ffffffff, +0xfffffff1ffffffff, +0xffffffe3ffffffff, +0xffffffc7ffffffff, +0xffffff8fffffffff, +0xffffff1fffffffff, +0xfffffe3fffffffff, +0xfffffc7fffffffff, +0xfffff8ffffffffff, +0xfffff1ffffffffff, +0xffffe3ffffffffff, +0xffffc7ffffffffff, +0xffff8fffffffffff, +0xffff1fffffffffff, +0xfffe3fffffffffff, +0xfffc7fffffffffff, +0xfff8ffffffffffff, +0xfff1ffffffffffff, +0xffe3ffffffffffff, +0xffc7ffffffffffff, +0xff8fffffffffffff, +0xff1fffffffffffff, +0xfe3fffffffffffff, +0xfc7fffffffffffff, +0xf8ffffffffffffff, +0xf1ffffffffffffff, +0xe3ffffffffffffff, +0xc7ffffffffffffff, +0x8fffffffffffffff, +0x3fffffffffffffff, +0x7ffffffffffffffe, +0xfffffffffffffffc, +0xfffffffffffffff9, +0xfffffffffffffff3, +0xffffffffffffffe7, +0xffffffffffffffcf, +0xffffffffffffff9f, +0xffffffffffffff3f, +0xfffffffffffffe7f, +0xfffffffffffffcff, +0xfffffffffffff9ff, +0xfffffffffffff3ff, +0xffffffffffffe7ff, +0xffffffffffffcfff, +0xffffffffffff9fff, +0xffffffffffff3fff, +0xfffffffffffe7fff, +0xfffffffffffcffff, +0xfffffffffff9ffff, +0xfffffffffff3ffff, +0xffffffffffe7ffff, +0xffffffffffcfffff, +0xffffffffff9fffff, +0xffffffffff3fffff, +0xfffffffffe7fffff, +0xfffffffffcffffff, +0xfffffffff9ffffff, +0xfffffffff3ffffff, +0xffffffffe7ffffff, +0xffffffffcfffffff, +0xffffffff9fffffff, +0xffffffff3fffffff, +0xfffffffe7fffffff, +0xfffffffcffffffff, +0xfffffff9ffffffff, +0xfffffff3ffffffff, +0xffffffe7ffffffff, +0xffffffcfffffffff, +0xffffff9fffffffff, +0xffffff3fffffffff, +0xfffffe7fffffffff, +0xfffffcffffffffff, +0xfffff9ffffffffff, +0xfffff3ffffffffff, +0xffffe7ffffffffff, +0xffffcfffffffffff, +0xffff9fffffffffff, +0xffff3fffffffffff, +0xfffe7fffffffffff, +0xfffcffffffffffff, +0xfff9ffffffffffff, +0xfff3ffffffffffff, +0xffe7ffffffffffff, +0xffcfffffffffffff, +0xff9fffffffffffff, +0xff3fffffffffffff, +0xfe7fffffffffffff, +0xfcffffffffffffff, +0xf9ffffffffffffff, +0xf3ffffffffffffff, +0xe7ffffffffffffff, +0xcfffffffffffffff, +0x9fffffffffffffff, +0x7fffffffffffffff, +0xfffffffffffffffe, +0xfffffffffffffffd, +0xfffffffffffffffb, +0xfffffffffffffff7, +0xffffffffffffffef, +0xffffffffffffffdf, +0xffffffffffffffbf, +0xffffffffffffff7f, +0xfffffffffffffeff, +0xfffffffffffffdff, +0xfffffffffffffbff, +0xfffffffffffff7ff, +0xffffffffffffefff, +0xffffffffffffdfff, +0xffffffffffffbfff, +0xffffffffffff7fff, +0xfffffffffffeffff, +0xfffffffffffdffff, +0xfffffffffffbffff, +0xfffffffffff7ffff, +0xffffffffffefffff, +0xffffffffffdfffff, +0xffffffffffbfffff, +0xffffffffff7fffff, +0xfffffffffeffffff, +0xfffffffffdffffff, +0xfffffffffbffffff, +0xfffffffff7ffffff, +0xffffffffefffffff, +0xffffffffdfffffff, +0xffffffffbfffffff, +0xffffffff7fffffff, +0xfffffffeffffffff, +0xfffffffdffffffff, +0xfffffffbffffffff, +0xfffffff7ffffffff, +0xffffffefffffffff, +0xffffffdfffffffff, +0xffffffbfffffffff, +0xffffff7fffffffff, +0xfffffeffffffffff, +0xfffffdffffffffff, +0xfffffbffffffffff, +0xfffff7ffffffffff, +0xffffefffffffffff, +0xffffdfffffffffff, +0xffffbfffffffffff, +0xffff7fffffffffff, +0xfffeffffffffffff, +0xfffdffffffffffff, +0xfffbffffffffffff, +0xfff7ffffffffffff, +0xffefffffffffffff, +0xffdfffffffffffff, +0xffbfffffffffffff, +0xff7fffffffffffff, +0xfeffffffffffffff, +0xfdffffffffffffff, +0xfbffffffffffffff, +0xf7ffffffffffffff, +0xefffffffffffffff, +0xdfffffffffffffff, +0xbfffffffffffffff, +/* +#include +#include + +// Dumps all legal bitmask immediates for ARM64 +// Total number of unique 64-bit patterns: +// 1*2 + 3*4 + 7*8 + 15*16 + 31*32 + 63*64 = 5334 + +const char *uint64_to_binary(uint64_t x) { + static char b[65]; + unsigned i; + for (i = 0; i < 64; i++, x <<= 1) + b[i] = (0x8000000000000000ULL & x)? '1' : '0'; + b[64] = '\0'; + return b; +} + +int main() { + uint64_t result; + unsigned size, length, rotation, e; + for (size = 2; size <= 64; size *= 2) + for (length = 1; length < size; ++length) { + result = 0xffffffffffffffffULL >> (64 - length); + for (e = size; e < 64; e *= 2) + result |= result << e; + for (rotation = 0; rotation < size; ++rotation) { +#if 0 + printf("0x%016llx %s (size=%u, length=%u, rotation=%u)\n", + (unsigned long long)result, uint64_to_binary(result), + size, length, rotation); +#endif + printf("0x%016llx\n", (unsigned long long)result ); + result = (result >> 63) | (result << 1); + } + } + return 0; +} +*/ diff --git a/src/mapleall/maple_be/src/cg/schedule.cpp b/src/mapleall/maple_be/src/cg/schedule.cpp index f94215fbe46664ffcc88f461fa04c682ee409467..5076266cff1535555d94e24417d6539aa9531c6c 100644 --- a/src/mapleall/maple_be/src/cg/schedule.cpp +++ b/src/mapleall/maple_be/src/cg/schedule.cpp @@ -14,6 +14,8 @@ */ #if TARGAARCH64 #include "aarch64_schedule.h" +#elif TARGRISCV64 +#include "riscv64_schedule.h" #endif #if TARGARM32 #include "arm32_schedule.h" diff --git a/src/mapleall/maple_be/src/cg/strldr.cpp b/src/mapleall/maple_be/src/cg/strldr.cpp index b73819e74e4c3499936afa011cd9e43544c9949f..f5f37357a54a9bdf0e0fb786121ef1ca8deabcd0 100644 --- a/src/mapleall/maple_be/src/cg/strldr.cpp +++ b/src/mapleall/maple_be/src/cg/strldr.cpp @@ -14,6 +14,8 @@ */ #if TARGAARCH64 #include "aarch64_strldr.h" +#elif TARGRISCV64 +#include "riscv64_strldr.h" #endif #if TARGARM32 #include "arm32_strldr.h" diff --git a/src/mapleall/maple_be/src/cg/yieldpoint.cpp b/src/mapleall/maple_be/src/cg/yieldpoint.cpp index 8ba932145e99f7f483470ad73b142c54c3fc4444..e1105b7e5f5663d26d9f8fdb0af929f54d9f6df8 100644 --- a/src/mapleall/maple_be/src/cg/yieldpoint.cpp +++ b/src/mapleall/maple_be/src/cg/yieldpoint.cpp @@ -15,6 +15,8 @@ #include "yieldpoint.h" #if TARGAARCH64 #include "aarch64_yieldpoint.h" +#elif TARGRISCV64 +#include "riscv64_yieldpoint.h" #endif #if TARGARM32 #include "arm32_yieldpoint.h"