diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 3d6f5e6f9d3dae03274317937162125faa537f71..2bbdfcae6ae8fed17ad69c324adc53804899f10f 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -640,6 +640,11 @@ option(LLVM_BUILD_RUNTIME "Build the LLVM runtime libraries." ON) option(LLVM_BUILD_EXAMPLES "Build the LLVM example programs. If OFF, just generate build targets." OFF) +option(BUILD_ARK_GC_SUPPORT + "ARK support GC. If ON, support GC." OFF) +if(BUILD_ARK_GC_SUPPORT) + add_definitions(-DARK_GC_SUPPORT) +endif(BUILD_ARK_GC_SUPPORT) option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) if(LLVM_BUILD_EXAMPLES) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index bb9e872b6ec5ca385c659998d363c63a789753ae..dc54d265f4907ee2bfff94a185e97636db45f951 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -4034,6 +4034,12 @@ LLVM_ATTRIBUTE_C_DEPRECATED( LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name); +#ifdef ARK_GC_SUPPORT +LLVMValueRef LLVMBuildCall3(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, + LLVMValueRef *Args, unsigned NumArgs, + const char *Name, LLVMValueRef *deoptVals, + int NumVals); +#endif LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If, LLVMValueRef Then, LLVMValueRef Else, const char *Name); diff --git a/llvm/include/llvm-c/ExecutionEngine.h b/llvm/include/llvm-c/ExecutionEngine.h index c5fc9bdb4d07f62462c65924e6ae8faf75748dec..ccd4e5164165ef040e3ff07957d79fa2d2877ce9 100644 --- a/llvm/include/llvm-c/ExecutionEngine.h +++ b/llvm/include/llvm-c/ExecutionEngine.h @@ -42,6 +42,9 @@ typedef struct LLVMOpaqueMCJITMemoryManager *LLVMMCJITMemoryManagerRef; struct LLVMMCJITCompilerOptions { unsigned OptLevel; +#ifdef ARK_GC_SUPPORT + LLVMRelocMode RelMode; +#endif LLVMCodeModel CodeModel; LLVMBool NoFramePointerElim; LLVMBool EnableFastISel; diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 6cd595e87e6445c021a97d638a14965f8ec72be7..2e96fcd0415dce5d7ed5bf63c7e01f96bc15a18b 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -18,6 +18,9 @@ #include "llvm/Support/TypeSize.h" #include "llvm/IR/CallingConv.h" // OHOS_LOCAL #include +#ifdef ARK_GC_SUPPORT +#include "llvm/ADT/Triple.h" +#endif namespace llvm { class BitVector; @@ -222,6 +225,26 @@ public: /// emitZeroCallUsedRegs - Zeros out call used registers. virtual void emitZeroCallUsedRegs(BitVector RegsToZero, MachineBasicBlock &MBB) const {} + #ifdef ARK_GC_SUPPORT + template + constexpr T RoundUp(T x, size_t n) const + { + static_assert(std::is_integral::value, "T must be integral"); + return (static_cast(x) + n - 1U) & (-n); + } + + virtual Triple::ArchType GetArkSupportTarget() const + { + return Triple::UnknownArch; + } + + virtual int GetFixedFpPosition() const + { + return 2; + } + + virtual int GetFrameReserveSize(MachineFunction &MF) const; + #endif /// OHOS_LOCAL begin /// Instances about backward cfi and stack protection provided by different architectures. diff --git a/llvm/include/llvm/IR/LegacyPassManager.h b/llvm/include/llvm/IR/LegacyPassManager.h index b3a4820ba0e492b10bf3751688212ef1f7d155b8..2d82c2561cebbbaaf4252758fa261a5333fbb957 100644 --- a/llvm/include/llvm/IR/LegacyPassManager.h +++ b/llvm/include/llvm/IR/LegacyPassManager.h @@ -16,6 +16,9 @@ #ifndef LLVM_IR_LEGACYPASSMANAGER_H #define LLVM_IR_LEGACYPASSMANAGER_H +#ifdef ARK_GC_SUPPORT +#include "llvm/Pass.h" +#endif #include "llvm/Support/CBindingWrapping.h" namespace llvm { diff --git a/llvm/include/llvm/Target/CodeGenCWrappers.h b/llvm/include/llvm/Target/CodeGenCWrappers.h index a995463570535d04ccb0c378639c076760b88c73..5929c7efe2126d35ce3b07117042c4745e5d96cb 100644 --- a/llvm/include/llvm/Target/CodeGenCWrappers.h +++ b/llvm/include/llvm/Target/CodeGenCWrappers.h @@ -59,6 +59,37 @@ inline LLVMCodeModel wrap(CodeModel::Model Model) { } llvm_unreachable("Bad CodeModel!"); } + +#ifdef ARK_GC_SUPPORT +inline Reloc::Model unwrap(LLVMRelocMode Model) { + switch (Model) { + case LLVMRelocDefault: + case LLVMRelocStatic: + return Reloc::Static; + case LLVMRelocPIC: + return Reloc::PIC_; + case LLVMRelocDynamicNoPic: + return Reloc::DynamicNoPIC; + } + llvm_unreachable("Invalid LLVMRelocMode!"); +} + +inline LLVMRelocMode unwrap(Reloc::Model Model) { + switch (Model) { + case Reloc::Static: + return LLVMRelocStatic; + case Reloc::PIC_: + return LLVMRelocPIC; + case Reloc::DynamicNoPIC: + return LLVMRelocDynamicNoPic; + case Reloc::ROPI: + case Reloc::RWPI: + case Reloc::ROPI_RWPI: + break; + } + llvm_unreachable("Invalid Reloc::Model!"); +} +#endif } // namespace llvm #endif diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 2cac1b55c3dda0af1bf6e7f3cd8fd8dc8aceb73d..5dbcd0facfd15df3f93b5923e1f015013c1c1c5f 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -68,6 +68,11 @@ #include #include +#ifdef ARK_GC_SUPPORT +#include +#include +#endif + using namespace llvm; #define DEBUG_TYPE "prologepilog" @@ -121,6 +126,9 @@ private: void calculateCallFrameInfo(MachineFunction &MF); void calculateSaveRestoreBlocks(MachineFunction &MF); +#ifdef ARK_GC_SUPPORT + void RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vector &CSI); +#endif void spillCalleeSavedRegs(MachineFunction &MF); void calculateFrameObjectOffsets(MachineFunction &MF); @@ -314,6 +322,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { RestoreBlocks.clear(); MFI.setSavePoint(nullptr); MFI.setRestorePoint(nullptr); +#ifdef ARK_GC_SUPPORT + std::vector &CSI = MFI.getCalleeSavedInfo(); + RecordCalleeSaveRegisterAndOffset(MF, CSI); +#endif return true; } @@ -649,6 +661,69 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } +#ifdef ARK_GC_SUPPORT +void PEI::RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vector &CSI) +{ + MachineModuleInfo &MMI = MF.getMMI(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + Function &func = const_cast(MF.getFunction()); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + Triple::ArchType archType = TFI->GetArkSupportTarget(); + + if ((archType != Triple::aarch64 && archType != Triple::x86_64) || !(TFI->hasFP(MF))) { + return; + } + unsigned FpRegDwarfNum = 0; + if (archType == Triple::aarch64) { + FpRegDwarfNum = 29; // x29 + } else { + FpRegDwarfNum = 6; //rbp + } + int64_t FpOffset = 0; + int64_t deleta; + // nearest to rbp callee register + int64_t maxOffset = INT_MIN; + for (auto I : CSI) { + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); + unsigned Reg = I.getReg(); + unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, true); + if (FpRegDwarfNum == DwarfRegNum) { + FpOffset = Offset; + } + maxOffset = std::max(Offset, maxOffset); + } + if (archType == Triple::x86_64) { + // rbp not existed in CSI + int64_t reseversize = TFI->GetFrameReserveSize(MF) + sizeof(uint64_t); // 1: rbp + deleta = maxOffset + reseversize; // nearest to rbp offset + } else { + deleta = FpOffset; + } + + const unsigned LinkRegDwarfNum = 30; + for (std::vector::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, true); + if ((DwarfRegNum == LinkRegDwarfNum || DwarfRegNum == FpRegDwarfNum) + && (archType == Triple::aarch64)) { + continue; + } + Offset = Offset - deleta; + std::string key = std::string("DwarfReg") + std::to_string(DwarfRegNum); + std::string value = std::to_string(Offset); + LLVM_DEBUG(dbgs() << "RecordCalleeSaveRegisterAndOffset DwarfRegNum :" + << DwarfRegNum << " key:" << key + << " value:" << value + << "]\n"); + Attribute attr = Attribute::get(func.getContext(), key.c_str(), value.c_str()); + func.addAttributeAtIndex(AttributeList::FunctionIndex, attr); + } +} +#endif + void PEI::spillCalleeSavedRegs(MachineFunction &MF) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass @@ -937,6 +1012,88 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // stack area. int64_t FixedCSEnd = Offset; +#ifdef ARK_GC_SUPPORT + int CalleeSavedFrameSize = 0; + Triple::ArchType archType = TFI.GetArkSupportTarget(); + if (archType == Triple::aarch64 && TFI.hasFP(MF)) { + int fpPosition = TFI.GetFixedFpPosition(); + int slotSize = sizeof(uint64_t); + int fpToCallerSpDelta = 0; + // 0:not exist +:count from head -:count from tail + // for x86-64 + // +--------------------------+ + // | caller Frame | + // +--------------------------+--- + // | returnAddr | ^ + // +--------------------------+ 2 slot(fpToCallerSpDelta) + // | Fp | V fpPosition = 2 + // +--------------------------+--- + // | type | + // +--------------------------+ + // | ReServeSize | + // +--------------------------+ + // | R14 | + // +--------------------------+ + // | R13 | + // +--------------------------+ + // | R12 | + // +--------------------------+ + // | RBX | + // +--------------------------+ + // for ARM64 + // +--------------------------+ + // | caller Frame | + // +--------------------------+--- + // | callee save registers | ^ + // | (exclude Fp) | | + // | | callee save registers size(fpToCallerSpDelta) + // +--------------------------+ | + // | Fp | V fpPosition = -1 + // +--------------------------+--- FixedCSEnd + // | type | + // +--------------------------+ + // | ReServeSize | + // +--------------------------+ + if (fpPosition >= 0) { + fpToCallerSpDelta = fpPosition * slotSize; + } else { + fpToCallerSpDelta = FixedCSEnd + (fpPosition + 1) * slotSize; + } + Function &func = const_cast(MF.getFunction()); + Attribute attr = Attribute::get(func.getContext(), "fpToCallerSpDelta", std::to_string(fpToCallerSpDelta).c_str()); + func.addAttributeAtIndex(AttributeList::FunctionIndex, attr); + + CalleeSavedFrameSize = TFI.GetFrameReserveSize(MF); + Offset += CalleeSavedFrameSize; + } + + if ((archType == Triple::x86_64) && TFI.hasFP(MF)) { + // Determine which of the registers in the callee save list should be saved. + int fpPosition = TFI.GetFixedFpPosition(); + int fpToCallerSpDelta = 0; + int slotSize = sizeof(uint64_t); + if (fpPosition >= 0) { + fpToCallerSpDelta = fpPosition * slotSize; + } else { + fpToCallerSpDelta = FixedCSEnd + (fpPosition + 1) * slotSize; + } + Function &func = const_cast(MF.getFunction()); + Attribute attr = Attribute::get(func.getContext(), "fpToCallerSpDelta", std::to_string(fpToCallerSpDelta).c_str()); + func.addAttributeAtIndex(AttributeList::FunctionIndex, attr); + + CalleeSavedFrameSize = TFI.GetFrameReserveSize(MF); + std::vector &CSI = MFI.getCalleeSavedInfo(); + LLVM_DEBUG(dbgs() << " CSI size: " << CSI.size() << " CalleeSavedFrameSize " << CalleeSavedFrameSize << "\n"); + // if callee-saved is empty, the reserved-size can't be passed to the computation of local zone + // because the assignCalleeSavedSpillSlots() directly return. + // Otherwise, the reserved-size don't need to add to the computation of local zone because it has been considered + // while computing the offsets of callee-saved-zone that will be passed to the computation of local-zone + if (CSI.empty()) { + Offset += CalleeSavedFrameSize; + } + } +#endif + // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index ccaff862fa3f3852e372c1c8f661293e226ee828..9254f3af5add64cde34d188804ddda4e575589f9 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -29,6 +29,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#ifdef ARK_GC_SUPPORT +#include "llvm/Target/TargetMachine.h" +#endif #include #include #include @@ -599,10 +602,11 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { // Function Frame records. LLVM_DEBUG(dbgs() << WSMP << "functions:\n"); for (auto const &FR : FnInfos) { - LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first - << " frame size: " << FR.second.StackSize - << " callsite count: " << FR.second.RecordCount << '\n'); - OS.emitSymbolValue(FR.first, 8); + #ifdef ARK_GC_SUPPORT + OS.emitSymbolValue(FR.first, AP.TM.getProgramPointerSize()); + #else + OS.emitSymbolValue(FR.first, 8); + #endif OS.emitIntValue(FR.second.StackSize, 8); OS.emitIntValue(FR.second.RecordCount, 8); } diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 0007c44b859ca2c94c3ac8c79b8bcb723e71e890..0e026bdf42f12e902b26ca7c0a53d4a9bf7a3d8e 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -179,3 +179,17 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF)}}; } + +#ifdef ARK_GC_SUPPORT +int TargetFrameLowering::GetFrameReserveSize(MachineFunction &MF) const +{ + int slotSize = sizeof(uint64_t); + int64_t marker = 0x0; + int reserveSize = 0; + MF.getFunction() + .getFnAttribute("frame-reserved-slots") + .getValueAsString() + .getAsInteger(10, marker); + return marker; +} +#endif diff --git a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 672fd7b991c25a6d144566324d485d156942a748..b4bbadc7f5e50a100b9168fe9d55619748a3f444 100644 --- a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -197,6 +197,9 @@ LLVMBool LLVMCreateMCJITCompilerForModule( builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) .setOptLevel((CodeGenOpt::Level)options.OptLevel) +#ifdef ARK_GC_SUPPORT + .setRelocationModel(unwrap(options.RelMode)) +#endif .setTargetOptions(targetOptions); bool JIT; if (Optional CM = unwrap(options.CodeModel, JIT)) diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp index f1eeee3b3599d236c90d1783813f4eca3d78dfc1..8ed1b504a029ca9dc661c0de96db9a2999b7ff90 100644 --- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -54,7 +54,22 @@ extern "C" { } namespace { - +#ifdef ARK_GC_SUPPORT +// We put information about the JITed function in this global, which the +// debugger reads. Make sure to specify the version statically, because the +// debugger checks the version before we can set it during runtime. +struct jit_descriptor __jit_debug_descriptor = {1, 0, nullptr, nullptr}; + +// Debuggers that implement the GDB JIT interface put a special breakpoint in +// this function. +LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { + // The noinline and the asm prevent calls to this function from being + // optimized out. +#if !defined(_MSC_VER) + asm volatile("" ::: "memory"); +#endif +} +#endif // FIXME: lli aims to provide both, RuntimeDyld and JITLink, as the dynamic // loaders for it's JIT implementations. And they both offer debugging via the // GDB JIT interface, which builds on the two well-known symbol names below. diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 08b7b0e1f9560519600776016b6cbf8ec8ce85a4..1369e572cb663aad0198c952d76574a3d2221e11 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3920,6 +3920,25 @@ LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, makeArrayRef(unwrap(Args), NumArgs), Name)); } +#ifdef ARK_GC_SUPPORT +LLVMValueRef LLVMBuildCall3(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, + LLVMValueRef *Args, unsigned NumArgs, + const char *Name, LLVMValueRef *deoptVals, + int NumVals) { + FunctionType *FTy = unwrap(Ty); + std::vector vals; + for (int i = 0; i < NumVals; i++) { + vals.push_back(unwrap(deoptVals[i])); + } + OperandBundleDefT deoptBundle("deopt", vals); + + return wrap(unwrap(B)->CreateCall(FTy, unwrap(Fn), + makeArrayRef(unwrap(Args), NumArgs), // Args + {deoptBundle}, // ArrayRef + Name)); +} +#endif + LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If, LLVMValueRef Then, LLVMValueRef Else, const char *Name) { diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index bf1a809e56336b106b956cf1063ee179c2f61d3e..8f3973ec9d32eca5ca24b5b9d409998d0ce7a60e 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -52,6 +52,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { (void)ID; } +#ifdef ARK_GC_SUPPORT + setOpaquePointers(false); +#endif + auto *DeoptEntry = pImpl->getOrInsertBundleTag("deopt"); assert(DeoptEntry->second == LLVMContext::OB_deopt && "deopt operand bundle id drifted!"); diff --git a/llvm/lib/Target/AArch64/AArch64ArkGc.td b/llvm/lib/Target/AArch64/AArch64ArkGc.td new file mode 100644 index 0000000000000000000000000000000000000000..9615d02f4cec5f0d9a350d9e466adc7e0bb9112b --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64ArkGc.td @@ -0,0 +1,1291 @@ +//=- AArch64ArkGc.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing. +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; + +def FeatureSM4 : SubtargetFeature< + "sm4", "HasSM4", "true", + "Enable SM3 and SM4 support", [FeatureNEON]>; + +def FeatureSHA2 : SubtargetFeature< + "sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureSHA3 : SubtargetFeature< + "sha3", "HasSHA3", "true", + "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>; + +def FeatureAES : SubtargetFeature< + "aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +// Crypto has been split up and any combination is now valid (see the +// crypto definitions above). Also, crypto is now context sensitive: +// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. +// Therefore, we rely on Clang, the user interacing tool, to pass on the +// appropriate crypto options. But here in the backend, crypto has very little +// meaning anymore. We kept the Crypto definition here for backward +// compatibility, and now imply features SHA2 and AES, which was the +// "traditional" meaning of Crypto. +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable ARMv8 CRC-32 checksum instructions">; + +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable ARMv8 Reliability, Availability and Serviceability Extensions">; + +def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", + "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; + +def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true", + "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">; + +def FeatureLDAPR : SubtargetFeature<"ldapr", "HasLDAPR", "true", + "Use LDAPR to lower atomic loads; experimental until we " + "have more testing/a formal correctness proof">; + +def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", + "Enable out of line atomics to support LSE instructions">; + +def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", + "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; + +def FeaturePAN : SubtargetFeature< + "pan", "HasPAN", "true", + "Enables ARM v8.1 Privileged Access-Never extension">; + +def FeatureLOR : SubtargetFeature< + "lor", "HasLOR", "true", + "Enables ARM v8.1 Limited Ordering Regions extension">; + +def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", + "true", "Enable RW operand CONTEXTIDR_EL2" >; + +def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", + "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable ARMv8 PMUv3 Performance Monitors extension">; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Full FP16", [FeatureFPARMv8]>; + +def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", + "Enable FP16 FML instructions", [FeatureFullFP16]>; + +def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", + "Enable Statistical Profiling extension">; + +def FeaturePAN_RWV : SubtargetFeature< + "pan-rwv", "HasPAN_RWV", "true", + "Enable v8.2 PAN s1e1R and s1e1W Variants", + [FeaturePAN]>; + +// UAO PState +def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true", + "Enable v8.2 UAO PState">; + +def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", + "true", "Enable v8.2 data Cache Clean to Point of Persistence" >; + +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>; + +// This flag is currently still labeled as Experimental, but when fully +// implemented this should tell the compiler to use the zeroing pseudos to +// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive +// lanes are known to be zero. The pseudos will then be expanded using the +// MOVPRFX instruction to zero the inactive lanes. This feature should only be +// enabled if MOVPRFX instructions are known to merge with the destructive +// operations they prefix. +// +// This feature could similarly be extended to support cheap merging of _any_ +// value into the inactive lanes using the MOVPRFX instruction that uses +// merging-predication. +def FeatureExperimentalZeroingPseudos + : SubtargetFeature<"use-experimental-zeroing-pseudos", + "UseExperimentalZeroingPseudos", "true", + "Hint to the compiler that the MOVPRFX instruction is " + "merged with destructive operations", + []>; + +def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", + "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; + +def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", + "Enable Scalable Vector Extension 2 (SVE2) instructions", + [FeatureSVE, FeatureUseScalarIncVL]>; + +def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", + "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>; + +def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", + "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>; + +def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", + "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; + +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", + "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; + +def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", + "Has zero-cycle register moves">; + +def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", + "Has zero-cycle zeroing instructions for generic registers">; + +// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". +// as movi is more efficient across all cores. Newer cores can eliminate +// fmovs early and there is no difference with movi, but this not true for +// all implementations. +def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", + "Has no zero-cycle zeroing instructions for FP registers">; + +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions", + [FeatureZCZeroingGP]>; + +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "RequiresStrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +foreach i = {1-7,9-15,18,20-30} in // OHOS_LOCAL + def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", + "Reserve X"#i#", making it unavailable " + "as a GPR">; + +foreach i = {8-15,18} in + def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, + "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; + +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", + "true", + "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; + +def FeaturePredictableSelectIsExpensive : SubtargetFeature< + "predictable-select-expensive", "PredictableSelectIsExpensive", "true", + "Prefer likely predicted branches over selects">; + +def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", + "HasCustomCheapAsMoveHandling", "true", + "Use custom handling of cheap instructions">; + +def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", + "HasExynosCheapAsMoveHandling", "true", + "Use Exynos specific handling of cheap instructions", + [FeatureCustomCheapAsMoveHandling]>; + +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", + "UsePostRAScheduler", "true", "Schedule again after register allocation">; + +def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", + "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">; + +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; + +def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address", + "IsStoreAddressAscend", "true", + "Schedule vector stores by ascending address">; + +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", + "true", "STR of Q register with register offset is slow">; + +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< + "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", + "true", "Use alternative pattern for sextload convert to f32">; + +def FeatureArithmeticBccFusion : SubtargetFeature< + "arith-bcc-fusion", "HasArithmeticBccFusion", "true", + "CPU fuses arithmetic+bcc operations">; + +def FeatureArithmeticCbzFusion : SubtargetFeature< + "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", + "CPU fuses arithmetic + cbz/cbnz operations">; + +def FeatureCmpBccFusion : SubtargetFeature< + "cmp-bcc-fusion", "HasCmpBccFusion", "true", + "CPU fuses cmp+bcc operations">; + +def FeatureFuseAddress : SubtargetFeature< + "fuse-address", "HasFuseAddress", "true", + "CPU fuses address generation and memory operations">; + +def FeatureFuseAES : SubtargetFeature< + "fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +def FeatureFuseArithmeticLogic : SubtargetFeature< + "fuse-arith-logic", "HasFuseArithmeticLogic", "true", + "CPU fuses arithmetic and logic operations">; + +def FeatureFuseCCSelect : SubtargetFeature< + "fuse-csel", "HasFuseCCSelect", "true", + "CPU fuses conditional select operations">; + +def FeatureFuseCryptoEOR : SubtargetFeature< + "fuse-crypto-eor", "HasFuseCryptoEOR", "true", + "CPU fuses AES/PMULL and EOR operations">; + +def FeatureFuseAdrpAdd : SubtargetFeature< + "fuse-adrp-add", "HasFuseAdrpAdd", "true", + "CPU fuses adrp+add operations">; + +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; + +def FeatureForce32BitJumpTables + : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", + "Force jump table entries to be 32-bits wide except at MinSize">; + +def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", + "Enable support for RCPC extension">; + +def FeatureUseRSqrt : SubtargetFeature< + "use-reciprocal-square-root", "UseRSqrt", "true", + "Use the reciprocal square root approximation">; + +def FeatureDotProd : SubtargetFeature< + "dotprod", "HasDotProd", "true", + "Enable dot product support">; + +def FeaturePAuth : SubtargetFeature< + "pauth", "HasPAuth", "true", + "Enable v8.3-A Pointer Authentication extension">; + +def FeatureJS : SubtargetFeature< + "jsconv", "HasJS", "true", + "Enable v8.3-A JavaScript FP conversion instructions", + [FeatureFPARMv8]>; + +def FeatureCCIDX : SubtargetFeature< + "ccidx", "HasCCIDX", "true", + "Enable v8.3-A Extend of the CCSIDR number of sets">; + +def FeatureComplxNum : SubtargetFeature< + "complxnum", "HasComplxNum", "true", + "Enable v8.3-A Floating-point complex number support", + [FeatureNEON]>; + +def FeatureNV : SubtargetFeature< + "nv", "HasNV", "true", + "Enable v8.4-A Nested Virtualization Enchancement">; + +def FeatureMPAM : SubtargetFeature< + "mpam", "HasMPAM", "true", + "Enable v8.4-A Memory system Partitioning and Monitoring extension">; + +def FeatureDIT : SubtargetFeature< + "dit", "HasDIT", "true", + "Enable v8.4-A Data Independent Timing instructions">; + +def FeatureTRACEV8_4 : SubtargetFeature< + "tracev8.4", "HasTRACEV8_4", "true", + "Enable v8.4-A Trace extension">; + +def FeatureAM : SubtargetFeature< + "am", "HasAM", "true", + "Enable v8.4-A Activity Monitors extension">; + +def FeatureAMVS : SubtargetFeature< + "amvs", "HasAMVS", "true", + "Enable v8.6-A Activity Monitors Virtualization support", + [FeatureAM]>; + +def FeatureSEL2 : SubtargetFeature< + "sel2", "HasSEL2", "true", + "Enable v8.4-A Secure Exception Level 2 extension">; + +def FeatureTLB_RMI : SubtargetFeature< + "tlb-rmi", "HasTLB_RMI", "true", + "Enable v8.4-A TLB Range and Maintenance Instructions">; + +def FeatureFlagM : SubtargetFeature< + "flagm", "HasFlagM", "true", + "Enable v8.4-A Flag Manipulation Instructions">; + +// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset +def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true", + "Enable v8.4-A RCPC instructions with Immediate Offsets", + [FeatureRCPC]>; + +def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +def FeatureLSLFast : SubtargetFeature< + "lsl-fast", "HasLSLFast", "true", + "CPU has a fastpath logical shift of up to 3 places">; + +def FeatureAggressiveFMA : + SubtargetFeature<"aggressive-fma", + "HasAggressiveFMA", + "true", + "Enable Aggressive FMA for floating-point.">; + +def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true", + "Enable alternative NZCV format for floating point comparisons">; + +def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true", + "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to " + "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >; + +def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict", + "true", "Enable architectural speculation restriction" >; + +def FeatureSB : SubtargetFeature<"sb", "HasSB", + "true", "Enable v8.5 Speculation Barrier" >; + +def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS", + "true", "Enable Speculative Store Bypass Safe bit" >; + +def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true", + "Enable v8.5a execution and data prediction invalidation instructions" >; + +def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP", + "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >; + +def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI", + "true", "Enable Branch Target Identification" >; + +def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", + "true", "Enable Random Number generation instructions" >; + +def FeatureMTE : SubtargetFeature<"mte", "HasMTE", + "true", "Enable Memory Tagging Extension" >; + +def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", + "true", "Enable Trace Buffer Extension">; + +def FeatureETE : SubtargetFeature<"ete", "HasETE", + "true", "Enable Embedded Trace Extension", + [FeatureTRBE]>; + +def FeatureTME : SubtargetFeature<"tme", "HasTME", + "true", "Enable Transactional Memory Extension" >; + +def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", + "AllowTaggedGlobals", + "true", "Use an instruction sequence for taking the address of a global " + "that allows a memory tag in the upper address bits">; + +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", + "true", "Enable BFloat16 Extension" >; + +def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", + "true", "Enable Matrix Multiply Int8 Extension">; + +def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32", + "true", "Enable Matrix Multiply FP32 Extension", [FeatureSVE]>; + +def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64", + "true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>; + +def FeatureXS : SubtargetFeature<"xs", "HasXS", + "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">; + +def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT", + "true", "Enable Armv8.7-A WFET and WFIT instruction">; + +def FeatureHCX : SubtargetFeature< + "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">; + +def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", + "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">; + +def FeatureHBC : SubtargetFeature<"hbc", "HasHBC", + "true", "Enable Armv8.8-A Hinted Conditional Branches Extension">; + +def FeatureMOPS : SubtargetFeature<"mops", "HasMOPS", + "true", "Enable Armv8.8-A memcpy and memset acceleration instructions">; + +def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", + "true", "Enable Branch Record Buffer Extension">; + +def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF", + "true", "Enable extra register in the Statistical Profiling Extension">; + +def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps", + "true", "Enable fine grained virtualization traps extension">; + +def FeatureEnhancedCounterVirtualization : + SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization", + "true", "Enable enhanced counter virtualization extension">; + +def FeatureRME : SubtargetFeature<"rme", "HasRME", + "true", "Enable Realm Management Extension">; + +def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", + "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureUseScalarIncVL]>; + +def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true", + "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>; + +def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true", + "Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>; + +def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", + "Apple A7 (the CPU formerly known as Cyclone)">; + +def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true", + "Enable Exception Level 2 Virtual Memory System Architecture">; + +def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true", + "Enable Exception Level 3">; + +def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", + "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; + +def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", + "NoBTIAtReturnTwice", "true", + "Don't place a BTI instruction " + "after a return-twice">; + +//===----------------------------------------------------------------------===// +// Architectures. +// +def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true", + "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>; + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE, + FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, + FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth, + FeatureJS, FeatureCCIDX, FeatureComplxNum]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, + FeatureNV, FeatureMPAM, FeatureDIT, + FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, + FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>; + +def HasV8_5aOps : SubtargetFeature< + "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", + [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, + FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, + FeatureBranchTargetId]>; + +def HasV8_6aOps : SubtargetFeature< + "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions", + [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, + FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>; + +def HasV8_7aOps : SubtargetFeature< + "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", + [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; + +def HasV8_8aOps : SubtargetFeature< + "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions", + [HasV8_7aOps, FeatureHBC, FeatureMOPS]>; + +def HasV9_0aOps : SubtargetFeature< + "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", + [HasV8_5aOps, FeatureSVE2]>; + +def HasV9_1aOps : SubtargetFeature< + "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions", + [HasV8_6aOps, HasV9_0aOps]>; + +def HasV9_2aOps : SubtargetFeature< + "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions", + [HasV8_7aOps, HasV9_1aOps]>; + +def HasV9_3aOps : SubtargetFeature< + "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions", + [HasV8_8aOps, HasV9_2aOps]>; + +def HasV8_0rOps : SubtargetFeature< + "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", + [//v8.1 + FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2, + //v8.2 + FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV, + //v8.3 + FeatureComplxNum, FeatureCCIDX, FeatureJS, + FeaturePAuth, FeatureRCPC, + //v8.4 + FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI, + FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, + // Not mandatory in v8.0-R, but included here on the grounds that it + // only enables names of system registers + FeatureSpecRestrict + ]>; + +// Only intended to be used by disassemblers. +def FeatureAll + : SubtargetFeature<"all", "IsAll", "true", "Enable all instructions", []>; + +class AssemblerPredicateWithAll + : AssemblerPredicate<(any_of FeatureAll, cond), name>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" +include "AArch64RegisterBanks.td" +include "AArch64ArkGcCallingConvention.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64Schedule.td" +include "AArch64InstrInfo.td" +include "AArch64SchedPredicates.td" +include "AArch64SchedPredExynos.td" +include "AArch64SchedPredAmpere.td" +include "AArch64Combine.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Named operands for MRS/MSR/TLBI/... +//===----------------------------------------------------------------------===// + +include "AArch64SystemOperands.td" + +//===----------------------------------------------------------------------===// +// Access to privileged registers +//===----------------------------------------------------------------------===// + +foreach i = 1-3 in +def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP", + "true", "Permit use of TPIDR_EL"#i#" for the TLS base">; + +//===----------------------------------------------------------------------===// +// Control codegen mitigation against Straight Line Speculation vulnerability. +//===----------------------------------------------------------------------===// + +def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", + "HardenSlsRetBr", "true", + "Harden against straight line speculation across RET and BR instructions">; +def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", + "HardenSlsBlr", "true", + "Harden against straight line speculation across BLR instructions">; +def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", + "HardenSlsNoComdat", "true", + "Generate thunk code for SLS mitigation in the normal text section">; + +//===----------------------------------------------------------------------===// +// AArch64 Processors supported. +// + +//===----------------------------------------------------------------------===// +// Unsupported features to disable for scheduling models +//===----------------------------------------------------------------------===// + +class AArch64Unsupported { list F; } + +def SVEUnsupported : AArch64Unsupported { + let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, + HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME]; +} + +def PAUnsupported : AArch64Unsupported { + let F = [HasPAuth]; +} + +def SMEUnsupported : AArch64Unsupported { + let F = [HasSME, HasSMEF64, HasSMEI64]; +} + +include "AArch64SchedA53.td" +include "AArch64SchedA55.td" +include "AArch64SchedA57.td" +include "AArch64SchedCyclone.td" +include "AArch64SchedFalkor.td" +include "AArch64SchedKryo.td" +include "AArch64SchedExynosM3.td" +include "AArch64SchedExynosM4.td" +include "AArch64SchedExynosM5.td" +include "AArch64SchedThunderX.td" +include "AArch64SchedThunderX2T99.td" +include "AArch64SchedA64FX.td" +include "AArch64SchedThunderX3T110.td" +include "AArch64SchedTSV110.td" +include "AArch64SchedAmpere1.td" +include "AArch64SchedNeoverseN2.td" + +def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors">; + +def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", [ + FeatureFuseAES, + FeatureBalanceFPOps, + FeatureCustomCheapAsMoveHandling, + FeaturePostRAScheduler]>; + +def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureFuseAddress]>; + +def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", + "Cortex-A510 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler + ]>; + +def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", [ + FeatureFuseAES, + FeatureBalanceFPOps, + FeatureCustomCheapAsMoveHandling, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", + "Cortex-A65 ARM processors", [ + FeatureFuseAES, + FeatureFuseAddress, + FeatureFuseAdrpAdd, + FeatureFuseLiterals]>; + +def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureFuseLiterals]>; + +def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", [ + FeatureFuseAES]>; + +def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", [ + FeatureFuseAES]>; + +def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", + "Cortex-A76 ARM processors", [ + FeatureFuseAES]>; + +def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", + "Cortex-A77 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES]>; + +def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", + "Cortex-A78 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeaturePostRAScheduler]>; + +def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", + "CortexA78C", + "Cortex-A78C ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeaturePostRAScheduler]>; + +def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", + "Cortex-A710 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion]>; + +def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", + "CortexR82", + "Cortex-R82 ARM processors", [ + FeaturePostRAScheduler]>; + +def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", + "Cortex-X1 ARM processors", [ + FeatureCmpBccFusion, + FeatureFuseAES, + FeaturePostRAScheduler]>; + +def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", + "Cortex-X2 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion]>; + +def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", + "Fujitsu A64FX processors", [ + FeaturePostRAScheduler, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeaturePredictableSelectIsExpensive + ]>; + +def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", + "Nvidia Carmel processors">; + +// Note that cyclone does not fuse AES instructions, but newer apple chips do +// perform the fusion and cyclone is used by default when targetting apple OSes. +def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", + "Apple A7 (the CPU formerly known as Cyclone)", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround] + >; + +def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", + "Apple A10", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureZCRegMove, + FeatureZCZeroing] + >; + +def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", + "Apple A11", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureZCRegMove, + FeatureZCZeroing] + >; + +def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", + "Apple A12", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureZCRegMove, + FeatureZCZeroing] + >; + +def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", + "Apple A13", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureZCRegMove, + FeatureZCZeroing] + >; + +def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", + "Apple A14", [ + FeatureAggressiveFMA, + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M3 processors", + [FeatureExynosCheapAsMoveHandling, + FeatureForce32BitJumpTables, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseCCSelect, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureLSLFast, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +// Re-uses some scheduling and tunings from the ExynosM3 proc family. +def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M4 processors", + [FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureExynosCheapAsMoveHandling, + FeatureForce32BitJumpTables, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseAdrpAdd, + FeatureFuseLiterals, + FeatureLSLFast, + FeaturePostRAScheduler, + FeatureZCZeroing]>; + +def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", [ + FeatureCustomCheapAsMoveHandling, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast] + >; + +def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", + "Qualcomm Falkor processors", [ + FeatureCustomCheapAsMoveHandling, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast, + FeatureSlowSTRQro + ]>; + +def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", + "Neoverse E1 ARM processors", [ + FeaturePostRAScheduler, + FeatureFuseAES + ]>; + +def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1", + "Neoverse N1 ARM processors", [ + FeaturePostRAScheduler, + FeatureFuseAES + ]>; + +def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2", + "Neoverse N2 ARM processors", [ + FeaturePostRAScheduler, + FeatureFuseAES + ]>; +def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB", + "Neoverse 512-TVB ARM processors", [ + FeaturePostRAScheduler, + FeatureFuseAES + ]>; + +def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1", + "Neoverse V1 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler]>; + +def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", + "Qualcomm Saphira processors", [ + FeatureCustomCheapAsMoveHandling, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast]>; + +def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", + "ThunderX3T110", + "Marvell ThunderX3 processors", [ + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureBalanceFPOps, + FeatureStrictAlign]>; + +def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", + "ThunderXT88", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", + "ThunderXT81", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", + "ThunderXT83", + "Cavium ThunderX processors", [ + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive]>; + +def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", + "HiSilicon TS-V110 processors", [ + FeatureCustomCheapAsMoveHandling, + FeatureFuseAES, + FeaturePostRAScheduler]>; + +def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", + "Ampere Computing Ampere-1 processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureLSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals]>; + +def ProcessorFeatures { + list A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeaturePerfMon]; + list A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon]; + list A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureMatMulInt8, FeatureBF16, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; + list A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeatureSSBS, FeatureRAS, + FeaturePerfMon]; + list A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeatureSSBS, FeaturePerfMon]; + list A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon, FeatureSSBS]; + list A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureRCPC, FeaturePerfMon, FeatureSPE, + FeatureSSBS]; + list A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureFullFP16, FeatureDotProd, + FeatureFlagM, FeatureFP16FML, FeaturePAuth, + FeaturePerfMon, FeatureRCPC, FeatureSPE, + FeatureSSBS]; + list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureETE, FeatureMTE, FeatureFP16FML, + FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; + list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, + FeatureFP16FML, FeatureSSBS, FeaturePredRes, + FeatureSB]; + list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureRCPC, FeaturePerfMon, + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeatureSSBS]; + list X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureRCPC, FeaturePerfMon, + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeaturePAuth, FeatureSSBS]; + list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureMatMulInt8, FeatureBF16, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; + list A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON, + FeatureSHA2, FeaturePerfMon, FeatureFullFP16, + FeatureSVE, FeatureComplxNum]; + list Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto, + FeatureFullFP16]; + list AppleA7 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg]; + list AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureCRC, + FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]; + list AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFullFP16]; + list AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFullFP16]; + list AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFullFP16, + FeatureFP16FML, FeatureSHA3]; + list AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureFRInt3264, + FeatureSpecRestrict, FeatureSSBS, FeatureSB, + FeaturePredRes, FeatureCacheDeepPersist, + FeatureFullFP16, FeatureFP16FML, FeatureSHA3, + FeatureAltFPCmp]; + list ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeaturePerfMon]; + list ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, + FeatureFullFP16, FeaturePerfMon]; + list Falkor = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeaturePerfMon, + FeatureRDM]; + list NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, + FeatureFPARMv8, FeatureFullFP16, FeatureNEON, + FeatureRCPC, FeatureSSBS, FeaturePerfMon]; + list NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, + FeatureFPARMv8, FeatureFullFP16, FeatureNEON, + FeatureRCPC, FeatureSPE, FeatureSSBS, + FeaturePerfMon]; + list NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE, + FeatureMatMulInt8, FeatureMTE, FeatureSVE2, + FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto, + FeaturePerfMon]; + list Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, + FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, + FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, + FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureSSBS, FeatureSVE]; + list NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, + FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, + FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, + FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureSSBS, FeatureSVE]; + list Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureSPE, FeaturePerfMon]; + list ThunderX = [HasV8_0aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeaturePerfMon, FeatureNEON]; + list ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeatureLSE]; + list ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto, + FeatureFPARMv8, FeatureNEON, FeatureLSE, + FeaturePAuth, FeaturePerfMon]; + list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; + list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureMTE, FeatureSSBS]; + + // ETE and TRBE are future architecture extensions. We temporarily enable them + // by default for users targeting generic AArch64. The extensions do not + // affect code generated by the compiler and can be used only by explicitly + // mentioning the new system register names in assembly. + list Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE]; +} + +// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging +// optimizations. +def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic, + [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler]>; +def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, + [TuneA35]>; +def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, + [TuneA35]>; +def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53, + [TuneA53]>; +def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55, + [TuneA55]>; +def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510, + [TuneA510]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53, + [TuneA57]>; +def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65, + [TuneA65]>; +def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65, + [TuneA65]>; +def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53, + [TuneA72]>; +def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53, + [TuneA73]>; +def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55, + [TuneA75]>; +def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76, + [TuneA76]>; +def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76, + [TuneA76]>; +def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77, + [TuneA77]>; +def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78, + [TuneA78]>; +def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, + [TuneA78C]>; +def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, + [TuneA710]>; +def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, + [TuneR82]>; +def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, + [TuneX1]>; +def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C, + [TuneX1]>; +def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2, + [TuneX2]>; +def : ProcessorModel<"neoverse-e1", CortexA53Model, + ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; +def : ProcessorModel<"neoverse-n1", CortexA57Model, + ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>; +def : ProcessorModel<"neoverse-n2", NeoverseN2Model, + ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; +def : ProcessorModel<"neoverse-512tvb", NeoverseN2Model, + ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; +def : ProcessorModel<"neoverse-v1", NeoverseN2Model, + ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>; +def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3, + [TuneExynosM3]>; +def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4, + [TuneExynosM4]>; +def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4, + [TuneExynosM4]>; +def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor, + [TuneFalkor]>; +def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira, + [TuneSaphira]>; +def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>; + +// Cavium ThunderX/ThunderX T8X Processors +def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX, + [TuneThunderX]>; +def : ProcessorModel<"thunderxt88", ThunderXT8XModel, + ProcessorFeatures.ThunderX, [TuneThunderXT88]>; +def : ProcessorModel<"thunderxt81", ThunderXT8XModel, + ProcessorFeatures.ThunderX, [TuneThunderXT81]>; +def : ProcessorModel<"thunderxt83", ThunderXT8XModel, + ProcessorFeatures.ThunderX, [TuneThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, + ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>; +// Marvell ThunderX3T110 Processors. +def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, + ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>; +def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, + [TuneTSV110]>; + +// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. +def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; + +// iPhone and iPad CPUs +def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; +def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; +def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7, + [TuneAppleA7]>; +def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10, + [TuneAppleA10]>; +def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11, + [TuneAppleA11]>; +def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12, + [TuneAppleA12]>; +def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13, + [TuneAppleA13]>; +def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14, + [TuneAppleA14]>; + +// Mac CPUs +def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14, + [TuneAppleA14]>; + +// watch CPUs. +def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12, + [TuneAppleA12]>; +def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12, + [TuneAppleA12]>; + +// Alias for the latest Apple processor model supported by LLVM. +def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14, + [TuneAppleA14]>; + +// Fujitsu A64FX +def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, + [TuneA64FX]>; + +// Nvidia Carmel +def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel, + [TuneCarmel]>; + +// Ampere Computing +def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, + [TuneAmpere1]>; + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def GenericAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "generic"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +def AppleAsmParserVariant : AsmParserVariant { + int Variant = 1; + string Name = "apple-neon"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// AArch64 Uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def GenericAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def AppleAsmWriter : AsmWriter { + let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; + int Variant = 1; + int isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; + let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; + let AllowRegisterRenaming = 1; +} + +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "AArch64PfmCounters.td" diff --git a/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td b/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td new file mode 100644 index 0000000000000000000000000000000000000000..5190ea36c3b68c48dd5ffda9270ebe0874145c52 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td @@ -0,0 +1,591 @@ +//=- AArch64ArkGcCallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for AArch64 architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfBigEndian - Match only if we're in big endian mode. +class CCIfBigEndian : + CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; + +class CCIfILP32 : + CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; + + +//===----------------------------------------------------------------------===// +// ARM AAPCS64 Calling Convention +//===----------------------------------------------------------------------===// + +let Entry = 1 in +def CC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter. + // However, on windows, in some circumstances, the SRet is passed in X0 or X1 + // instead. The presence of the inreg attribute indicates that SRet is + // passed in the alternative register (X0 or X1), not X8: + // - X0 for non-instance methods. + // - X1 for instance methods. + + // The "sret" attribute identifies indirect returns. + // The "inreg" attribute identifies non-aggregate types. + // The position of the "sret" attribute identifies instance/non-instance + // methods. + // "sret" on argument 0 means non-instance methods. + // "sret" on argument 1 means instance methods. + + CCIfInReg>>>>, + + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // The 'nest' parameter, if any, is passed in X18. + // Darwin uses X18 as the platform register and hence 'nest' isn't currently + // supported there. + CCIfNest>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in X21. + CCIfSwiftError>>, + + // Pass SwiftAsync in an otherwise callee saved register so that it will be + // preserved for normal function calls. + CCIfSwiftAsync>>, + + CCIfConsecutiveRegs>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], + CCPassIndirect>, + + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>>, + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCPassIndirect>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], CCIfSplit>>, + + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, + CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>, + CCIfType<[i32, f32], CCAssignToStack<8, 8>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + +let Entry = 1 in +def RetCC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + CCIfConsecutiveRegs>, + CCIfSwiftError>>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, + CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>> +]>; + +// Vararg functions on windows pass floats in integer registers +let Entry = 1 in +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, bf16], CCBitConvertToType>, + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[f64], CCBitConvertToType>, + CCDelegateTo +]>; + +// Windows Control Flow Guard checks take a single argument (the target function +// address) and have no return value. +let Entry = 1 in +def CC_AArch64_Win64_CFGuard_Check : CallingConv<[ + CCIfType<[i64], CCAssignToReg<[X15]>> +]>; + + +// Darwin uses a calling convention which differs in only two ways +// from the standard one at this level: +// + i128s (i.e. split i64s) don't need even registers. +// + Stack slots are sized as needed rather than being at least 64-bit. +let Entry = 1 in +def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in X21. + CCIfSwiftError>>, + + // Pass SwiftAsync in an otherwise callee saved register so that it will be + // preserved for normal function calls. + CCIfSwiftAsync>>, + + CCIfConsecutiveRegs>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], + CCIfSplit>>, + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, + CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16", + CCAssignToStack<2, 2>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Re-demote pointers to 32-bits so we don't end up storing 64-bit + // values and clobbering neighbouring stack locations. Not very pretty. + CCIfPtr>>, + CCIfPtr>>, + + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + +let Entry = 1 in +def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + CCIfConsecutiveRegs>, + + // Handle all scalar types as either i64 or f64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + CCIfType<[f16, bf16, f32], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + +// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the +// same as the normal Darwin VarArgs handling. +let Entry = 1 in +def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // Handle all scalar types as either i32 or f32. + CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[f16, bf16], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfPtr>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + + +// The WebKit_JS calling convention only passes the first argument (the callee) +// in register and the remaining arguments on stack. We allow 32bit stack slots, +// so that WebKit can write partial values in the stack and define the other +// 32bit quantity as undef. +let Entry = 1 in +def CC_AArch64_WebKit_JS : CallingConv<[ + // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToReg<[W0]>>, + CCIfType<[i64], CCAssignToReg<[X0]>>, + + // Pass the remaining arguments on the stack instead. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +let Entry = 1 in +def RetCC_AArch64_WebKit_JS : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>> +]>; + +// OHOS_LOCAL begin +// Ark Conventions +let Entry = 1 in +def CC_AArch64_ArkInt : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W20, W21, W22, W23, W24, W25, W26, W28], [X20, X21, X22, X23, X24, X25, X26, X28]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X20, X21, X22, X23, X24, X25, X26, X28], [W20, W21, W22, W23, W24, W25, W26, W28]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkFast0 : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W28, W29], [X28, FP]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X28, FP], [W28, W29]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkFast1 : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W28, W29], [X0, X28, FP]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X28, FP], [W0, W28, W29]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkFast2 : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W28, W29], [X0, X1, X28, FP]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X28, FP], [W0, W1, W28, W29]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkFast3 : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W28, W29], [X0, X1, X2, X28, FP]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X28, FP], [W0, W1, W2, W28, W29]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkFast4 : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W28, W29], [X0, X1, X2, X3, X28, FP]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X28, FP], [W0, W1, W2, W3, W28, W29]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkFast5 : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W28, W29], [X0, X1, X2, X3, X4, X28, FP]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X28, FP], [W0, W1, W2, W3, W4, W28, W29]>>, +]>; + +let Entry = 1 in +def CC_AArch64_ArkResolver : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W16], [X16]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X16], [W16]>>, +]>; + +let Entry = 1 in +def RetCC_AArch64_ArkResolver : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W16], [X16]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X16], [W16]>>, +]>; +// OHOS_LOCAL end + +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +let Entry = 1 in +def CC_AArch64_GHC : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, FP, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; + +// The order of the callee-saves in this file is important, because the +// FrameLowering code will use this order to determine the layout the +// callee-save area in the stack frame. As can be observed below, Darwin +// requires the frame-record (LR, FP) to be at the top the callee-save area, +// whereas for other platforms they are at the bottom. + +// FIXME: LR is only callee-saved in the sense that *we* preserve it and are +// presumably a callee to someone. External functions may not do so, but this +// is currently safe since BL has LR as an implicit-def and what happens after a +// tail call doesn't matter. +// +// It would be better to model its preservation semantics properly (create a +// vreg on entry, use it in RET & tail call generation; make that vreg def if we +// end up saving LR as part of a call frame). Watch this space... +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15, + LR, FP)>; + +// A variant for treating X18 as callee saved, when interfacing with +// code that needs X18 to be preserved. +def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>; + +// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. +// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, +// and not (LR,FP) pairs. +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, FP, LR, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// The Control Flow Guard check call uses a custom calling convention that also +// preserves X0-X8 and Q0-Q7. +def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS, + (sequence "X%u", 0, 8), + (sequence "Q%u", 0, 7))>; + +// AArch64 PCS for vector functions (VPCS) +// must (additionally) preserve full Q8-Q23 registers +def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + (sequence "Q%u", 8, 23))>; + +// Functions taking SVE arguments or returning an SVE type +// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 +def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23), + (sequence "P%u", 4, 15), + X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP)>; + +def CSR_AArch64_AAPCS_SwiftTail + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>; + +// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since +// 'this' and the pointer return value are both passed in X0 in these cases, +// this can be partially modelled by treating X0 as a callee-saved register; +// only the resulting RegMask is used; the SaveList is ignored +// +// (For generic ARM 64-bit ABI code, clang will not generate constructors or +// destructors with 'this' returns, so this RegMask will not be used in that +// case) +def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; + +def CSR_AArch64_AAPCS_SwiftError + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + +// The ELF stub used for TLS-descriptor access saves every feasible +// register. Only X0 and LR are clobbered. +def CSR_AArch64_TLS_ELF + : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_AllRegs + : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, + (sequence "X%u", 0, 28), FP, LR, SP, + (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), + (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sequence "X%u", 9, 15))>; + +def CSR_AArch64_StackProbe_Windows + : CalleeSavedRegs<(add (sequence "X%u", 0, 15), + (sequence "X%u", 18, 28), FP, SP, + (sequence "Q%u", 0, 31))>; + +// OHOS_LOCAL begin +def CSR_AArch64_ArkInt : CalleeSavedRegs<(add FP)>; + +def CSR_AArch64_ArkFast5 + : CalleeSavedRegs<(add (sub (sequence "X%u", 5, 27), X16, X17), LR, + (sequence "D%u", 0, 31))>; +def CSR_AArch64_ArkFast4 + : CalleeSavedRegs<(add CSR_AArch64_ArkFast5, X4)>; + +def CSR_AArch64_ArkFast3 + : CalleeSavedRegs<(add CSR_AArch64_ArkFast4, X3)>; + +def CSR_AArch64_ArkFast2 + : CalleeSavedRegs<(add CSR_AArch64_ArkFast3, X2)>; + +def CSR_AArch64_ArkFast1 + : CalleeSavedRegs<(add CSR_AArch64_ArkFast2, X1)>; + +def CSR_AArch64_ArkFast0 + : CalleeSavedRegs<(add CSR_AArch64_ArkFast1, X0)>; + +def CSR_AArch64_ArkMethod : CalleeSavedRegs<(add LR, FP)>; +// OHOS_LOCAL end + +// Darwin variants of AAPCS. +// Darwin puts the frame-record at the top of the callee-save area. +def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, + X22, X23, X24, X25, X26, X27, + X28, (sequence "Q%u", 8, 23))>; +def CSR_Darwin_AArch64_AAPCS_ThisReturn + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>; + +def CSR_Darwin_AArch64_AAPCS_SwiftError + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; + +def CSR_Darwin_AArch64_AAPCS_SwiftTail + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>; + +// The function used by Darwin to obtain the address of a thread-local variable +// guarantees more than a normal AAPCS function. x16 and x17 are used on the +// fast path for calculation, but other registers except X0 (argument/return) +// and LR (it is a call, after all) are preserved. +def CSR_Darwin_AArch64_TLS + : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), + FP, + (sequence "Q%u", 0, 31))>; + +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_Darwin_AArch64_TLS, +// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS. +def CSR_Darwin_AArch64_CXX_TLS + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19), + (sequence "D%u", 0, 31))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_Darwin_AArch64_CXX_TLS_PE + : CalleeSavedRegs<(add LR, FP)>; + +// CSRs that are handled explicitly via copies. +def CSR_Darwin_AArch64_CXX_TLS_ViaCopy + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>; + +def CSR_Darwin_AArch64_RT_MostRegs + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>; + +// Variants of the standard calling conventions for shadow call stack. +// These all preserve x18 in addition to any other registers. +def CSR_AArch64_NoRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; +def CSR_AArch64_AllRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; +def CSR_AArch64_AAPCS_SwiftError_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; +def CSR_AArch64_RT_MostRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; +def CSR_AArch64_AAVPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>; +def CSR_AArch64_SVE_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>; +def CSR_AArch64_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index fa4bbadd0995fcc0dfe276d4095ee82bd8385c92..8856fe0ea5554f1bd17287b66a852da6b0b726fd 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1403,6 +1403,18 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, } } +#ifdef ARK_GC_SUPPORT +Triple::ArchType AArch64FrameLowering::GetArkSupportTarget() const +{ + return Triple::aarch64; +} + +int AArch64FrameLowering::GetFixedFpPosition() const +{ + return -1; +} +#endif + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -1501,8 +1513,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. + #ifndef ARK_GC_SUPPORT if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + #endif + // asm-int GHC call webkit function, we need push regs to stack. // OHOS_LOCAL begin if (HasFP && (MF.getFunction().getCallingConv() == CallingConv::ArkFast0 || @@ -1970,8 +1985,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. + #ifndef ARK_GC_SUPPORT if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + #endif + // asm-int GHC call webkit function, we need push regs to stack. // How much of the stack used by incoming arguments this function is expected // to restore in this particular epilogue. @@ -2995,8 +3013,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. + #ifndef ARK_GC_SUPPORT if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + #endif + // asm-int GHC call webkit function, we need push regs to stack. TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 8a3dbefb124ed88eedea584b345f32cb48d5bd41..221f92856c5573367ba6d3abc4a2a5b667e2c9d9 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -16,6 +16,9 @@ #include "AArch64StackProtectorRetLowering.h" // OHOS_LOCAL #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Support/TypeSize.h" +#ifdef ARK_GC_SUPPORT +#include "llvm/ADT/Triple.h" +#endif namespace llvm { @@ -42,6 +45,10 @@ public: /// the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; +#ifdef ARK_GC_SUPPORT + Triple::ArchType GetArkSupportTarget() const override; + int GetFixedFpPosition() const override; +#endif const StackProtectorRetLowering *getStackProtectorRet() const override; // OHOS_LOCAL diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7c5b5251aed33e833d999ed0053cf90fb74733aa..283da2a3dcf5ec81b5a0ab92942030ed8edc4ac3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6239,8 +6239,13 @@ SDValue AArch64TargetLowering::LowerCallResult( /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { +#ifdef ARK_GC_SUPPORT + return ((CC == CallingConv::GHC || CC == CallingConv::Fast) && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#else return (CC == CallingConv::Fast && GuaranteeTailCalls) || CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#endif } /// Return true if we might ever do TCO for calls with this calling convention. @@ -6491,8 +6496,13 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const { +#ifdef ARK_GC_SUPPORT + return ((CallCC == CallingConv::GHC || CallCC == CallingConv::Fast) && TailCallOpt) || + CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; +#else return (CallCC == CallingConv::Fast && TailCallOpt) || CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; +#endif } // Check if the value is zero-extended from i1 to i8 diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 95294679f3eed1a5a4a3a03af67eacd1b7c1565b..49c45cec1e911469029559c5a30c6f656d14a7b5 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -354,6 +354,17 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->hasFP(MF) || TT.isOSDarwin()) markSuperRegs(Reserved, AArch64::W29); +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + markSuperRegs(Reserved, AArch64::W29); + markSuperRegs(Reserved, AArch64::W30); + } + if ((MF.getFunction().getCallingConv() == CallingConv::WebKit_JS) || + (MF.getFunction().getCallingConv() == CallingConv::C)) { + markSuperRegs(Reserved, AArch64::W30); + } +#endif + for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) { if (MF.getSubtarget().isXRegisterReserved(i)) markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i)); diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index ecf8be3c8503dc7f9e716994ed980cbd5ac7d41e..e3d4d5494fa90b28971936de2b954be811c19cd4 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -1,7 +1,10 @@ add_llvm_component_group(AArch64 HAS_JIT) -set(LLVM_TARGET_DEFINITIONS AArch64.td) - +if(BUILD_ARK_GC_SUPPORT) + set(LLVM_TARGET_DEFINITIONS AArch64ArkGc.td) +else() + set(LLVM_TARGET_DEFINITIONS AArch64.td) +endif(BUILD_ARK_GC_SUPPORT) tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 48277a8504115d0c5d02c27c095d8db18fb2453f..62bf5f439fe6f8059e23bd9193aff7d662255d9f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -341,8 +341,13 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { } // namespace static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { +#ifdef ARK_GC_SUPPORT + return ((CallConv == CallingConv::GHC || CallConv == CallingConv::Fast) && TailCallOpt) || + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; +#else return (CallConv == CallingConv::Fast && TailCallOpt) || CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; +#endif } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -662,8 +667,13 @@ bool AArch64CallLowering::lowerFormalArguments( /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { +#ifdef ARK_GC_SUPPORT + return ((CC == CallingConv::GHC || CC == CallingConv::Fast) && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#else return (CC == CallingConv::Fast && GuaranteeTailCalls) || CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#endif } /// Return true if we might ever do TCO for calls with this calling convention. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 58b60c19448721332468da138073efc6281b3718..0267b00aefba2dc5226447c1d77cc3b6d1179c31 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1481,6 +1481,37 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { - for 32-bit code, substitute %e?? registers for %r?? */ +#ifdef ARK_GC_SUPPORT +Triple::ArchType X86FrameLowering::GetArkSupportTarget() const +{ + return Is64Bit ? Triple::x86_64 : Triple::x86; +} + +int X86FrameLowering::GetFixedFpPosition() const +{ + return 2; +} + +int X86FrameLowering::GetFrameReserveSize(MachineFunction &MF) const +{ + int slotSize = sizeof(uint64_t); + if (!Is64Bit) { + slotSize = sizeof(uint32_t); + } + int reserveSize = 0; + MF.getFunction() + .getFnAttribute("frame-reserved-slots") + .getValueAsString() + .getAsInteger(10, reserveSize); + + // x86-64 shoule align 16 bytes + if (Is64Bit) { + return RoundUp(reserveSize, 2 * sizeof(uint64_t)); + } + return reserveSize; +} +#endif + void X86FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&STI == &MF.getSubtarget() && @@ -1760,6 +1791,20 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, else MFI.setOffsetAdjustment(-StackSize); } +#ifdef ARK_GC_SUPPORT + // push marker + if (MF.getFunction().hasFnAttribute("frame-reserved-slots")) + { + unsigned StackPtr = TRI->getStackRegister(); + int reserveSize = GetFrameReserveSize(MF); + const unsigned SUBOpc = + getSUBriOpcode(Uses64BitFramePtr, reserveSize); + BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(reserveSize) + .setMIFlag(MachineInstr::FrameSetup); + } +#endif // For EH funclets, only allocate enough space for outgoing calls. Save the // NumBytes value that we would've used for the parent frame. @@ -2226,6 +2271,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // AfterPop is the position to insert .cfi_restore. MachineBasicBlock::iterator AfterPop = MBBI; if (HasFP) { +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().hasFnAttribute("frame-reserved-slots")) + { + + int reserveSize = GetFrameReserveSize(MF); + int slotSize = sizeof(uint32_t); + if (Is64Bit) { + slotSize = sizeof(uint64_t); + } + for (int i = 0; i < reserveSize / slotSize; i++) { + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); + } + } +#endif if (X86FI->hasSwiftAsyncContext()) { // Discard the context. int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); @@ -2650,6 +2711,12 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( } } +#ifdef ARK_GC_SUPPORT + int reserveSize = GetFrameReserveSize(MF); + SpillSlotOffset -= reserveSize; // skip frame reserved + CalleeSavedFrameSize += reserveSize; +#endif + // Assign slots for GPRs. It increases frame size. for (CalleeSavedInfo &I : llvm::reverse(CSI)) { Register Reg = I.getReg(); diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index e6a6241fdda039fa6506871328f8e9a293fae8ba..9783c7ca2d358cce6a48864fdde196ba5a38e776 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -74,6 +74,11 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. +#ifdef ARK_GC_SUPPORT + Triple::ArchType GetArkSupportTarget() const override; + int GetFixedFpPosition() const override; + int GetFrameReserveSize(MachineFunction &MF) const override; +#endif void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a75ee58ad7f75ebaa0df7aaae7b8c877a48bbb6f..c22915ce3fbf852854fa2b9ba253d0846f1f1ea3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4400,8 +4400,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); +#ifdef ARK_GC_SUPPORT + assert(!(isVarArg && canGuaranteeTCO(CallConv) && (CallConv != CallingConv::GHC)) && + "Var args not supported with calling convention fastcc, ghc or hipe"); +#else assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); +#endif // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 381901e74d9f7d4d5408970d7e4a79c0e37a299a..f6f92bddeff6f9cc50f0c3b7df6ed198cd8a8484 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -611,6 +611,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) Reserved.set(SubReg); } +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) + Reserved.set(SubReg); + } +#endif // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) {