diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 3d6f5e6f9d3dae03274317937162125faa537f71..2bbdfcae6ae8fed17ad69c324adc53804899f10f 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -640,6 +640,11 @@ option(LLVM_BUILD_RUNTIME "Build the LLVM runtime libraries." ON) option(LLVM_BUILD_EXAMPLES "Build the LLVM example programs. If OFF, just generate build targets." OFF) +option(BUILD_ARK_GC_SUPPORT + "ARK support GC. If ON, support GC." OFF) +if(BUILD_ARK_GC_SUPPORT) + add_definitions(-DARK_GC_SUPPORT) +endif(BUILD_ARK_GC_SUPPORT) option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) if(LLVM_BUILD_EXAMPLES) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index bb9e872b6ec5ca385c659998d363c63a789753ae..dc54d265f4907ee2bfff94a185e97636db45f951 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -4034,6 +4034,12 @@ LLVM_ATTRIBUTE_C_DEPRECATED( LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name); +#ifdef ARK_GC_SUPPORT +LLVMValueRef LLVMBuildCall3(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, + LLVMValueRef *Args, unsigned NumArgs, + const char *Name, LLVMValueRef *deoptVals, + int NumVals); +#endif LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If, LLVMValueRef Then, LLVMValueRef Else, const char *Name); diff --git a/llvm/include/llvm-c/ExecutionEngine.h b/llvm/include/llvm-c/ExecutionEngine.h index c5fc9bdb4d07f62462c65924e6ae8faf75748dec..ccd4e5164165ef040e3ff07957d79fa2d2877ce9 100644 --- a/llvm/include/llvm-c/ExecutionEngine.h +++ b/llvm/include/llvm-c/ExecutionEngine.h @@ -42,6 +42,9 @@ typedef struct LLVMOpaqueMCJITMemoryManager *LLVMMCJITMemoryManagerRef; struct LLVMMCJITCompilerOptions { unsigned OptLevel; +#ifdef ARK_GC_SUPPORT + LLVMRelocMode RelMode; +#endif LLVMCodeModel CodeModel; LLVMBool NoFramePointerElim; LLVMBool EnableFastISel; diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 0e0d6b6292d770ec7cf587652baf727d21cd811e..9bb467a367338efe1a5e00990ef902e39bca1ba1 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -18,6 +18,9 @@ #include "llvm/Support/TypeSize.h" #include "llvm/IR/CallingConv.h" // OHOS_LOCAL #include +#ifdef ARK_GC_SUPPORT +#include "llvm/ADT/Triple.h" +#endif namespace llvm { class BitVector; @@ -222,6 +225,26 @@ public: /// emitZeroCallUsedRegs - Zeros out call used registers. virtual void emitZeroCallUsedRegs(BitVector RegsToZero, MachineBasicBlock &MBB) const {} + #ifdef ARK_GC_SUPPORT + template + constexpr T RoundUp(T x, size_t n) const + { + static_assert(std::is_integral::value, "T must be integral"); + return (static_cast(x) + n - 1U) & (-n); + } + + virtual Triple::ArchType GetArkSupportTarget() const + { + return Triple::UnknownArch; + } + + virtual int GetFixedFpPosition() const + { + return 2; + } + + virtual int GetFrameReserveSize(MachineFunction &MF) const; + #endif /// OHOS_LOCAL begin /// Instances about backward cfi and stack protection provided by different architectures. diff --git a/llvm/include/llvm/IR/LegacyPassManager.h b/llvm/include/llvm/IR/LegacyPassManager.h index b3a4820ba0e492b10bf3751688212ef1f7d155b8..2d82c2561cebbbaaf4252758fa261a5333fbb957 100644 --- a/llvm/include/llvm/IR/LegacyPassManager.h +++ b/llvm/include/llvm/IR/LegacyPassManager.h @@ -16,6 +16,9 @@ #ifndef LLVM_IR_LEGACYPASSMANAGER_H #define LLVM_IR_LEGACYPASSMANAGER_H +#ifdef ARK_GC_SUPPORT +#include "llvm/Pass.h" +#endif #include "llvm/Support/CBindingWrapping.h" namespace llvm { diff --git a/llvm/include/llvm/Target/CodeGenCWrappers.h b/llvm/include/llvm/Target/CodeGenCWrappers.h index a995463570535d04ccb0c378639c076760b88c73..5929c7efe2126d35ce3b07117042c4745e5d96cb 100644 --- a/llvm/include/llvm/Target/CodeGenCWrappers.h +++ b/llvm/include/llvm/Target/CodeGenCWrappers.h @@ -59,6 +59,37 @@ inline LLVMCodeModel wrap(CodeModel::Model Model) { } llvm_unreachable("Bad CodeModel!"); } + +#ifdef ARK_GC_SUPPORT +inline Reloc::Model unwrap(LLVMRelocMode Model) { + switch (Model) { + case LLVMRelocDefault: + case LLVMRelocStatic: + return Reloc::Static; + case LLVMRelocPIC: + return Reloc::PIC_; + case LLVMRelocDynamicNoPic: + return Reloc::DynamicNoPIC; + } + llvm_unreachable("Invalid LLVMRelocMode!"); +} + +inline LLVMRelocMode unwrap(Reloc::Model Model) { + switch (Model) { + case Reloc::Static: + return LLVMRelocStatic; + case Reloc::PIC_: + return LLVMRelocPIC; + case Reloc::DynamicNoPIC: + return LLVMRelocDynamicNoPic; + case Reloc::ROPI: + case Reloc::RWPI: + case Reloc::ROPI_RWPI: + break; + } + llvm_unreachable("Invalid Reloc::Model!"); +} +#endif } // namespace llvm #endif diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index b1caeefab81dcb0ae25fa9b294146dc9786b586a..1604865e3e1e2d4b1b660ed5a2bab76a6a878823 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -68,6 +68,11 @@ #include #include +#ifdef ARK_GC_SUPPORT +#include +#include +#endif + using namespace llvm; #define DEBUG_TYPE "prologepilog" @@ -121,6 +126,9 @@ private: void calculateCallFrameInfo(MachineFunction &MF); void calculateSaveRestoreBlocks(MachineFunction &MF); +#ifdef ARK_GC_SUPPORT + void RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vector &CSI); +#endif void spillCalleeSavedRegs(MachineFunction &MF); void calculateFrameObjectOffsets(MachineFunction &MF); @@ -314,6 +322,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { RestoreBlocks.clear(); MFI.setSavePoint(nullptr); MFI.setRestorePoint(nullptr); +#ifdef ARK_GC_SUPPORT + std::vector &CSI = MFI.getCalleeSavedInfo(); + RecordCalleeSaveRegisterAndOffset(MF, CSI); +#endif return true; } @@ -649,6 +661,69 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } +#ifdef ARK_GC_SUPPORT +void PEI::RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vector &CSI) +{ + MachineModuleInfo &MMI = MF.getMMI(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + Function &func = const_cast(MF.getFunction()); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + Triple::ArchType archType = TFI->GetArkSupportTarget(); + + if ((archType != Triple::aarch64 && archType != Triple::x86_64) || !(TFI->hasFP(MF))) { + return; + } + unsigned FpRegDwarfNum = 0; + if (archType == Triple::aarch64) { + FpRegDwarfNum = 29; // x29 + } else { + FpRegDwarfNum = 6; //rbp + } + int64_t FpOffset = 0; + int64_t deleta; + // nearest to rbp callee register + int64_t maxOffset = INT_MIN; + for (auto I : CSI) { + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); + unsigned Reg = I.getReg(); + unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, true); + if (FpRegDwarfNum == DwarfRegNum) { + FpOffset = Offset; + } + maxOffset = std::max(Offset, maxOffset); + } + if (archType == Triple::x86_64) { + // rbp not existed in CSI + int64_t reseversize = TFI->GetFrameReserveSize(MF) + sizeof(uint64_t); // 1: rbp + deleta = maxOffset + reseversize; // nearest to rbp offset + } else { + deleta = FpOffset; + } + + const unsigned LinkRegDwarfNum = 30; + for (std::vector::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, true); + if ((DwarfRegNum == LinkRegDwarfNum || DwarfRegNum == FpRegDwarfNum) + && (archType == Triple::aarch64)) { + continue; + } + Offset = Offset - deleta; + std::string key = std::string("DwarfReg") + std::to_string(DwarfRegNum); + std::string value = std::to_string(Offset); + LLVM_DEBUG(dbgs() << "RecordCalleeSaveRegisterAndOffset DwarfRegNum :" + << DwarfRegNum << " key:" << key + << " value:" << value + << "]\n"); + Attribute attr = Attribute::get(func.getContext(), key.c_str(), value.c_str()); + func.addAttributeAtIndex(AttributeList::FunctionIndex, attr); + } +} +#endif + void PEI::spillCalleeSavedRegs(MachineFunction &MF) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass @@ -928,6 +1003,88 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // stack area. int64_t FixedCSEnd = Offset; +#ifdef ARK_GC_SUPPORT + int CalleeSavedFrameSize = 0; + Triple::ArchType archType = TFI.GetArkSupportTarget(); + if (archType == Triple::aarch64 && TFI.hasFP(MF)) { + int fpPosition = TFI.GetFixedFpPosition(); + int slotSize = sizeof(uint64_t); + int fpToCallerSpDelta = 0; + // 0:not exist +:count from head -:count from tail + // for x86-64 + // +--------------------------+ + // | caller Frame | + // +--------------------------+--- + // | returnAddr | ^ + // +--------------------------+ 2 slot(fpToCallerSpDelta) + // | Fp | V fpPosition = 2 + // +--------------------------+--- + // | type | + // +--------------------------+ + // | ReServeSize | + // +--------------------------+ + // | R14 | + // +--------------------------+ + // | R13 | + // +--------------------------+ + // | R12 | + // +--------------------------+ + // | RBX | + // +--------------------------+ + // for ARM64 + // +--------------------------+ + // | caller Frame | + // +--------------------------+--- + // | callee save registers | ^ + // | (exclude Fp) | | + // | | callee save registers size(fpToCallerSpDelta) + // +--------------------------+ | + // | Fp | V fpPosition = -1 + // +--------------------------+--- FixedCSEnd + // | type | + // +--------------------------+ + // | ReServeSize | + // +--------------------------+ + if (fpPosition >= 0) { + fpToCallerSpDelta = fpPosition * slotSize; + } else { + fpToCallerSpDelta = FixedCSEnd + (fpPosition + 1) * slotSize; + } + Function &func = const_cast(MF.getFunction()); + Attribute attr = Attribute::get(func.getContext(), "fpToCallerSpDelta", std::to_string(fpToCallerSpDelta).c_str()); + func.addAttributeAtIndex(AttributeList::FunctionIndex, attr); + + CalleeSavedFrameSize = TFI.GetFrameReserveSize(MF); + Offset += CalleeSavedFrameSize; + } + + if ((archType == Triple::x86_64) && TFI.hasFP(MF)) { + // Determine which of the registers in the callee save list should be saved. + int fpPosition = TFI.GetFixedFpPosition(); + int fpToCallerSpDelta = 0; + int slotSize = sizeof(uint64_t); + if (fpPosition >= 0) { + fpToCallerSpDelta = fpPosition * slotSize; + } else { + fpToCallerSpDelta = FixedCSEnd + (fpPosition + 1) * slotSize; + } + Function &func = const_cast(MF.getFunction()); + Attribute attr = Attribute::get(func.getContext(), "fpToCallerSpDelta", std::to_string(fpToCallerSpDelta).c_str()); + func.addAttributeAtIndex(AttributeList::FunctionIndex, attr); + + CalleeSavedFrameSize = TFI.GetFrameReserveSize(MF); + std::vector &CSI = MFI.getCalleeSavedInfo(); + LLVM_DEBUG(dbgs() << " CSI size: " << CSI.size() << " CalleeSavedFrameSize " << CalleeSavedFrameSize << "\n"); + // if callee-saved is empty, the reserved-size can't be passed to the computation of local zone + // because the assignCalleeSavedSpillSlots() directly return. + // Otherwise, the reserved-size don't need to add to the computation of local zone because it has been considered + // while computing the offsets of callee-saved-zone that will be passed to the computation of local-zone + if (CSI.empty()) { + Offset += CalleeSavedFrameSize; + } + } +#endif + // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index ccaff862fa3f3852e372c1c8f661293e226ee828..9254f3af5add64cde34d188804ddda4e575589f9 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -29,6 +29,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#ifdef ARK_GC_SUPPORT +#include "llvm/Target/TargetMachine.h" +#endif #include #include #include @@ -599,10 +602,11 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { // Function Frame records. LLVM_DEBUG(dbgs() << WSMP << "functions:\n"); for (auto const &FR : FnInfos) { - LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first - << " frame size: " << FR.second.StackSize - << " callsite count: " << FR.second.RecordCount << '\n'); - OS.emitSymbolValue(FR.first, 8); + #ifdef ARK_GC_SUPPORT + OS.emitSymbolValue(FR.first, AP.TM.getProgramPointerSize()); + #else + OS.emitSymbolValue(FR.first, 8); + #endif OS.emitIntValue(FR.second.StackSize, 8); OS.emitIntValue(FR.second.RecordCount, 8); } diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 0007c44b859ca2c94c3ac8c79b8bcb723e71e890..0e026bdf42f12e902b26ca7c0a53d4a9bf7a3d8e 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -179,3 +179,17 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF)}}; } + +#ifdef ARK_GC_SUPPORT +int TargetFrameLowering::GetFrameReserveSize(MachineFunction &MF) const +{ + int slotSize = sizeof(uint64_t); + int64_t marker = 0x0; + int reserveSize = 0; + MF.getFunction() + .getFnAttribute("frame-reserved-slots") + .getValueAsString() + .getAsInteger(10, marker); + return marker; +} +#endif diff --git a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 672fd7b991c25a6d144566324d485d156942a748..b4bbadc7f5e50a100b9168fe9d55619748a3f444 100644 --- a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -197,6 +197,9 @@ LLVMBool LLVMCreateMCJITCompilerForModule( builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) .setOptLevel((CodeGenOpt::Level)options.OptLevel) +#ifdef ARK_GC_SUPPORT + .setRelocationModel(unwrap(options.RelMode)) +#endif .setTargetOptions(targetOptions); bool JIT; if (Optional CM = unwrap(options.CodeModel, JIT)) diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp index f1eeee3b3599d236c90d1783813f4eca3d78dfc1..b4ef229dcdb514a8327b0d1d046bf694e1903a92 100644 --- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -54,6 +54,22 @@ extern "C" { } namespace { +#ifdef ARK_GC_SUPPORT +// We put information about the JITed function in this global, which the +// debugger reads. Make sure to specify the version statically, because the +// debugger checks the version before we can set it during runtime. +struct jit_descriptor __jit_debug_descriptor = {1, 0, nullptr, nullptr}; + +// Debuggers that implement the GDB JIT interface put a special breakpoint in +// this function. +LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { + // The noinline and the asm prevent calls to this function from being + // optimized out. +#if !defined(_MSC_VER) + asm volatile("" ::: "memory"); +#endif +#endif +} // FIXME: lli aims to provide both, RuntimeDyld and JITLink, as the dynamic // loaders for it's JIT implementations. And they both offer debugging via the diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 08b7b0e1f9560519600776016b6cbf8ec8ce85a4..1369e572cb663aad0198c952d76574a3d2221e11 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3920,6 +3920,25 @@ LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, makeArrayRef(unwrap(Args), NumArgs), Name)); } +#ifdef ARK_GC_SUPPORT +LLVMValueRef LLVMBuildCall3(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, + LLVMValueRef *Args, unsigned NumArgs, + const char *Name, LLVMValueRef *deoptVals, + int NumVals) { + FunctionType *FTy = unwrap(Ty); + std::vector vals; + for (int i = 0; i < NumVals; i++) { + vals.push_back(unwrap(deoptVals[i])); + } + OperandBundleDefT deoptBundle("deopt", vals); + + return wrap(unwrap(B)->CreateCall(FTy, unwrap(Fn), + makeArrayRef(unwrap(Args), NumArgs), // Args + {deoptBundle}, // ArrayRef + Name)); +} +#endif + LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If, LLVMValueRef Then, LLVMValueRef Else, const char *Name) { diff --git a/llvm/lib/Target/AArch64/AArch64ArkGc.td b/llvm/lib/Target/AArch64/AArch64ArkGc.td new file mode 100644 index 0000000000000000000000000000000000000000..ccd982d4491930175e06e567b3ee8b33f527ec8e --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64ArkGc.td @@ -0,0 +1,1263 @@ +//=- AArch64ArkGc.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing. +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; + +def FeatureSM4 : SubtargetFeature< + "sm4", "HasSM4", "true", + "Enable SM3 and SM4 support", [FeatureNEON]>; + +def FeatureSHA2 : SubtargetFeature< + "sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureSHA3 : SubtargetFeature< + "sha3", "HasSHA3", "true", + "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>; + +def FeatureAES : SubtargetFeature< + "aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +// Crypto has been split up and any combination is now valid (see the +// crypto definitions above). Also, crypto is now context sensitive: +// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. +// Therefore, we rely on Clang, the user interacing tool, to pass on the +// appropriate crypto options. But here in the backend, crypto has very little +// meaning anymore. We kept the Crypto definition here for backward +// compatibility, and now imply features SHA2 and AES, which was the +// "traditional" meaning of Crypto. +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable ARMv8 CRC-32 checksum instructions">; + +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable ARMv8 Reliability, Availability and Serviceability Extensions">; + +def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", + "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; + +def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", + "Enable out of line atomics to support LSE instructions">; + +def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", + "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; + +def FeaturePAN : SubtargetFeature< + "pan", "HasPAN", "true", + "Enables ARM v8.1 Privileged Access-Never extension">; + +def FeatureLOR : SubtargetFeature< + "lor", "HasLOR", "true", + "Enables ARM v8.1 Limited Ordering Regions extension">; + +def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", + "true", "Enable RW operand CONTEXTIDR_EL2" >; + +def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", + "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable ARMv8 PMUv3 Performance Monitors extension">; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Full FP16", [FeatureFPARMv8]>; + +def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", + "Enable FP16 FML instructions", [FeatureFullFP16]>; + +def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", + "Enable Statistical Profiling extension">; + +def FeaturePAN_RWV : SubtargetFeature< + "pan-rwv", "HasPAN_RWV", "true", + "Enable v8.2 PAN s1e1R and s1e1W Variants", + [FeaturePAN]>; + +// UAO PState +def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true", + "Enable v8.2 UAO PState">; + +def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", + "true", "Enable v8.2 data Cache Clean to Point of Persistence" >; + +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>; + +// This flag is currently still labeled as Experimental, but when fully +// implemented this should tell the compiler to use the zeroing pseudos to +// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive +// lanes are known to be zero. The pseudos will then be expanded using the +// MOVPRFX instruction to zero the inactive lanes. This feature should only be +// enabled if MOVPRFX instructions are known to merge with the destructive +// operations they prefix. +// +// This feature could similarly be extended to support cheap merging of _any_ +// value into the inactive lanes using the MOVPRFX instruction that uses +// merging-predication. +def FeatureExperimentalZeroingPseudos + : SubtargetFeature<"use-experimental-zeroing-pseudos", + "UseExperimentalZeroingPseudos", "true", + "Hint to the compiler that the MOVPRFX instruction is " + "merged with destructive operations", + []>; + +def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", + "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>; + +def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", + "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>; + +def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", + "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>; + +def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", + "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; + +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", + "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; + +def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", + "Has zero-cycle register moves">; + +def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", + "Has zero-cycle zeroing instructions for generic registers">; + +def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true", + "Has zero-cycle zeroing instructions for FP registers">; + +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions", + [FeatureZCZeroingGP, FeatureZCZeroingFP]>; + +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +foreach i = {1-7,9-15,18,20-28,30} in + def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", + "Reserve X"#i#", making it unavailable " + "as a GPR">; + +foreach i = {8-15,18} in + def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, + "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; + +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", + "true", + "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; + +def FeaturePredictableSelectIsExpensive : SubtargetFeature< + "predictable-select-expensive", "PredictableSelectIsExpensive", "true", + "Prefer likely predicted branches over selects">; + +def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", + "CustomAsCheapAsMove", "true", + "Use custom handling of cheap instructions">; + +def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", + "ExynosAsCheapAsMove", "true", + "Use Exynos specific handling of cheap instructions", + [FeatureCustomCheapAsMoveHandling]>; + +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", + "UsePostRAScheduler", "true", "Schedule again after register allocation">; + +def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", + "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; + +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; + +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", + "true", "STR of Q register with register offset is slow">; + +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< + "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", + "true", "Use alternative pattern for sextload convert to f32">; + +def FeatureArithmeticBccFusion : SubtargetFeature< + "arith-bcc-fusion", "HasArithmeticBccFusion", "true", + "CPU fuses arithmetic+bcc operations">; + +def FeatureArithmeticCbzFusion : SubtargetFeature< + "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", + "CPU fuses arithmetic + cbz/cbnz operations">; + +def FeatureCmpBccFusion : SubtargetFeature< + "cmp-bcc-fusion", "HasCmpBccFusion", "true", + "CPU fuses cmp+bcc operations">; + +def FeatureFuseAddress : SubtargetFeature< + "fuse-address", "HasFuseAddress", "true", + "CPU fuses address generation and memory operations">; + +def FeatureFuseAES : SubtargetFeature< + "fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +def FeatureFuseArithmeticLogic : SubtargetFeature< + "fuse-arith-logic", "HasFuseArithmeticLogic", "true", + "CPU fuses arithmetic and logic operations">; + +def FeatureFuseCCSelect : SubtargetFeature< + "fuse-csel", "HasFuseCCSelect", "true", + "CPU fuses conditional select operations">; + +def FeatureFuseCryptoEOR : SubtargetFeature< + "fuse-crypto-eor", "HasFuseCryptoEOR", "true", + "CPU fuses AES/PMULL and EOR operations">; + +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; + +def FeatureForce32BitJumpTables + : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", + "Force jump table entries to be 32-bits wide except at MinSize">; + +def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", + "Enable support for RCPC extension">; + +def FeatureUseRSqrt : SubtargetFeature< + "use-reciprocal-square-root", "UseRSqrt", "true", + "Use the reciprocal square root approximation">; + +def FeatureDotProd : SubtargetFeature< + "dotprod", "HasDotProd", "true", + "Enable dot product support">; + +def FeaturePAuth : SubtargetFeature< + "pauth", "HasPAuth", "true", + "Enable v8.3-A Pointer Authentication extension">; + +def FeatureJS : SubtargetFeature< + "jsconv", "HasJS", "true", + "Enable v8.3-A JavaScript FP conversion instructions", + [FeatureFPARMv8]>; + +def FeatureCCIDX : SubtargetFeature< + "ccidx", "HasCCIDX", "true", + "Enable v8.3-A Extend of the CCSIDR number of sets">; + +def FeatureComplxNum : SubtargetFeature< + "complxnum", "HasComplxNum", "true", + "Enable v8.3-A Floating-point complex number support", + [FeatureNEON]>; + +def FeatureNV : SubtargetFeature< + "nv", "HasNV", "true", + "Enable v8.4-A Nested Virtualization Enchancement">; + +def FeatureMPAM : SubtargetFeature< + "mpam", "HasMPAM", "true", + "Enable v8.4-A Memory system Partitioning and Monitoring extension">; + +def FeatureDIT : SubtargetFeature< + "dit", "HasDIT", "true", + "Enable v8.4-A Data Independent Timing instructions">; + +def FeatureTRACEV8_4 : SubtargetFeature< + "tracev8.4", "HasTRACEV8_4", "true", + "Enable v8.4-A Trace extension">; + +def FeatureAM : SubtargetFeature< + "am", "HasAM", "true", + "Enable v8.4-A Activity Monitors extension">; + +def FeatureAMVS : SubtargetFeature< + "amvs", "HasAMVS", "true", + "Enable v8.6-A Activity Monitors Virtualization support", + [FeatureAM]>; + +def FeatureSEL2 : SubtargetFeature< + "sel2", "HasSEL2", "true", + "Enable v8.4-A Secure Exception Level 2 extension">; + +def FeaturePMU : SubtargetFeature< + "pmu", "HasPMU", "true", + "Enable v8.4-A PMU extension">; + +def FeatureTLB_RMI : SubtargetFeature< + "tlb-rmi", "HasTLB_RMI", "true", + "Enable v8.4-A TLB Range and Maintenance Instructions">; + +def FeatureFlagM : SubtargetFeature< + "flagm", "HasFlagM", "true", + "Enable v8.4-A Flag Manipulation Instructions">; + +// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset +def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true", + "Enable v8.4-A RCPC instructions with Immediate Offsets", + [FeatureRCPC]>; + +def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +def FeatureLSLFast : SubtargetFeature< + "lsl-fast", "HasLSLFast", "true", + "CPU has a fastpath logical shift of up to 3 places">; + +def FeatureAggressiveFMA : + SubtargetFeature<"aggressive-fma", + "HasAggressiveFMA", + "true", + "Enable Aggressive FMA for floating-point.">; + +def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true", + "Enable alternative NZCV format for floating point comparisons">; + +def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true", + "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to " + "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >; + +def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict", + "true", "Enable architectural speculation restriction" >; + +def FeatureSB : SubtargetFeature<"sb", "HasSB", + "true", "Enable v8.5 Speculation Barrier" >; + +def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS", + "true", "Enable Speculative Store Bypass Safe bit" >; + +def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true", + "Enable v8.5a execution and data prediction invalidation instructions" >; + +def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP", + "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >; + +def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI", + "true", "Enable Branch Target Identification" >; + +def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", + "true", "Enable Random Number generation instructions" >; + +def FeatureMTE : SubtargetFeature<"mte", "HasMTE", + "true", "Enable Memory Tagging Extension" >; + +def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", + "true", "Enable Trace Buffer Extension">; + +def FeatureETE : SubtargetFeature<"ete", "HasETE", + "true", "Enable Embedded Trace Extension", + [FeatureTRBE]>; + +def FeatureTME : SubtargetFeature<"tme", "HasTME", + "true", "Enable Transactional Memory Extension" >; + +def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", + "AllowTaggedGlobals", + "true", "Use an instruction sequence for taking the address of a global " + "that allows a memory tag in the upper address bits">; + +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", + "true", "Enable BFloat16 Extension" >; + +def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", + "true", "Enable Matrix Multiply Int8 Extension">; + +def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32", + "true", "Enable Matrix Multiply FP32 Extension", [FeatureSVE]>; + +def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64", + "true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>; + +def FeatureXS : SubtargetFeature<"xs", "HasXS", + "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">; + +def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT", + "true", "Enable Armv8.7-A WFET and WFIT instruction">; + +def FeatureHCX : SubtargetFeature< + "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">; + +def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", + "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">; + +def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", + "true", "Enable Branch Record Buffer Extension">; + +def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF", + "true", "Enable extra register in the Statistical Profiling Extension">; + +def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps", + "true", "Enable fine grained virtualization traps extension">; + +def FeatureEnhancedCounterVirtualization : + SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization", + "true", "Enable enhanced counter virtualization extension">; + +//===----------------------------------------------------------------------===// +// Architectures. +// + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM, + FeaturePAN, FeatureLOR, FeatureVH]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, + FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth, + FeatureJS, FeatureCCIDX, FeatureComplxNum]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, + FeatureNV, FeatureMPAM, FeatureDIT, + FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI, + FeatureFlagM, FeatureRCPC_IMMO]>; + +def HasV8_5aOps : SubtargetFeature< + "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", + [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, + FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, + FeatureBranchTargetId]>; + +def HasV8_6aOps : SubtargetFeature< + "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions", + [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, + FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>; + +def HasV8_7aOps : SubtargetFeature< + "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", + [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; + +def HasV8_0rOps : SubtargetFeature< + "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", + [//v8.1 + FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2, + //v8.2 + FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4, + FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV, + //v8.3 + FeatureComplxNum, FeatureCCIDX, FeatureJS, + FeaturePAuth, FeatureRCPC, + //v8.4 + FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4, + FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, + //v8.5 + FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" +include "AArch64RegisterBanks.td" +include "AArch64ArkGcCallingConvention.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64Schedule.td" +include "AArch64InstrInfo.td" +include "AArch64SchedPredicates.td" +include "AArch64SchedPredExynos.td" +include "AArch64Combine.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Named operands for MRS/MSR/TLBI/... +//===----------------------------------------------------------------------===// + +include "AArch64SystemOperands.td" + +//===----------------------------------------------------------------------===// +// Access to privileged registers +//===----------------------------------------------------------------------===// + +foreach i = 1-3 in +def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP", + "true", "Permit use of TPIDR_EL"#i#" for the TLS base">; + +//===----------------------------------------------------------------------===// +// Control codegen mitigation against Straight Line Speculation vulnerability. +//===----------------------------------------------------------------------===// + +def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", + "HardenSlsRetBr", "true", + "Harden against straight line speculation across RET and BR instructions">; +def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", + "HardenSlsBlr", "true", + "Harden against straight line speculation across BLR instructions">; + +//===----------------------------------------------------------------------===// +// AArch64 Processors supported. +// + +//===----------------------------------------------------------------------===// +// Unsupported features to disable for scheduling models +//===----------------------------------------------------------------------===// + +class AArch64Unsupported { list F; } + +def SVEUnsupported : AArch64Unsupported { + let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, + HasSVE2BitPerm]; +} + +def PAUnsupported : AArch64Unsupported { + let F = [HasPAuth]; +} + +include "AArch64SchedA53.td" +include "AArch64SchedA55.td" +include "AArch64SchedA57.td" +include "AArch64SchedCyclone.td" +include "AArch64SchedFalkor.td" +include "AArch64SchedKryo.td" +include "AArch64SchedExynosM3.td" +include "AArch64SchedExynosM4.td" +include "AArch64SchedExynosM5.td" +include "AArch64SchedThunderX.td" +include "AArch64SchedThunderX2T99.td" +include "AArch64SchedA64FX.td" +include "AArch64SchedThunderX3T110.td" +include "AArch64SchedTSV110.td" + +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureUseAA + ]>; + +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureFullFP16, + FeatureDotProd, + FeatureRCPC, + FeaturePerfMon + ]>; + +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseLiterals, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive + ]>; + +def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", + "Cortex-A65 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseLiterals, + FeatureNEON, + FeatureRAS, + FeatureRCPC, + FeatureSSBS, + ]>; + +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseLiterals, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureFullFP16, + FeatureDotProd, + FeatureRCPC, + FeaturePerfMon + ]>; + +def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", + "Cortex-A76 ARM processors", [ + HasV8_2aOps, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureRCPC, + FeatureCrypto, + FeatureFullFP16, + FeatureDotProd, + FeatureSSBS + ]>; + +def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", + "Cortex-A77 ARM processors", [ + HasV8_2aOps, + FeatureCmpBccFusion, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, FeatureRCPC, + FeatureCrypto, + FeatureFullFP16, + FeatureDotProd + ]>; + +def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", + "CortexA78", + "Cortex-A78 ARM processors", [ + HasV8_2aOps, + FeatureCmpBccFusion, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureRCPC, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSPE, + FeatureFullFP16, + FeatureSSBS, + FeatureDotProd]>; + +def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily", + "CortexA78C", + "Cortex-A78C ARM processors", [ + HasV8_2aOps, + FeatureCmpBccFusion, + FeatureCrypto, + FeatureDotProd, + FeatureFlagM, + FeatureFP16FML, + FeatureFPARMv8, + FeatureFullFP16, + FeatureFuseAES, + FeatureNEON, + FeaturePAuth, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureRCPC, + FeatureSPE, + FeatureSSBS]>; + +def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", + "CortexR82", + "Cortex-R82 ARM Processors", [ + FeaturePostRAScheduler, + // TODO: crypto and FuseAES + // All other features are implied by v8_0r ops: + HasV8_0rOps, + ]>; + +def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", + "Cortex-X1 ARM processors", [ + HasV8_2aOps, + FeatureCmpBccFusion, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureRCPC, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSPE, + FeatureFullFP16, + FeatureDotProd]>; + +def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", + "Fujitsu A64FX processors", [ + HasV8_2aOps, + FeatureFPARMv8, + FeatureNEON, + FeatureSHA2, + FeaturePerfMon, + FeatureFullFP16, + FeatureSVE, + FeaturePostRAScheduler, + FeatureComplxNum, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeaturePredictableSelectIsExpensive + ]>; + +def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", + "Nvidia Carmel processors", [ + HasV8_2aOps, + FeatureNEON, + FeatureCrypto, + FeatureFullFP16 + ]>; + +// Note that cyclone does not fuse AES instructions, but newer apple chips do +// perform the fusion and cyclone is used by default when targetting apple OSes. +def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", + "Apple A7 (the CPU formerly known as Cyclone)", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround + ]>; + +def ProcAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", + "Apple A10", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureCRC, + FeatureRDM, + FeaturePAN, + FeatureLOR, + FeatureVH, + ]>; + +def ProcAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", + "Apple A11", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureFullFP16, + HasV8_2aOps + ]>; + +def ProcAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", + "Apple A12", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureFullFP16, + HasV8_3aOps + ]>; + +def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", + "Apple A13", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseCryptoEOR, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureFullFP16, + FeatureFP16FML, + FeatureSHA3, + HasV8_4aOps + ]>; + +def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", + "Apple A14", [ + FeatureAggressiveFMA, + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureAltFPCmp, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFRInt3264, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureNEON, + FeaturePerfMon, + FeatureSpecRestrict, + FeatureSSBS, + FeatureSB, + FeaturePredRes, + FeatureCacheDeepPersist, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureFullFP16, + FeatureFP16FML, + FeatureSHA3, + HasV8_4aOps + ]>; + +def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M3 processors", + [FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureForce32BitJumpTables, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseCCSelect, + FeatureFuseLiterals, + FeatureLSLFast, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroingFP]>; + +def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M4 processors", + [HasV8_2aOps, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDotProd, + FeatureExynosCheapAsMoveHandling, + FeatureForce32BitJumpTables, + FeatureFullFP16, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseLiterals, + FeatureLSLFast, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureZCZeroing]>; + +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", [ + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast + ]>; + +def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", + "Qualcomm Falkor processors", [ + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureRDM, + FeatureZCZeroing, + FeatureLSLFast, + FeatureSlowSTRQro + ]>; + +def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", + "NeoverseE1", + "Neoverse E1 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRCPC, + FeatureSSBS, + ]>; + +def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", + "NeoverseN1", + "Neoverse N1 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRCPC, + FeatureSPE, + FeatureSSBS, + ]>; + +def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", + "NeoverseN2", + "Neoverse N2 ARM processors", [ + HasV8_5aOps, + FeatureBF16, + FeatureETE, + FeatureMatMulInt8, + FeatureMTE, + FeatureSVE2, + FeatureSVE2BitPerm, + FeatureTRBE]>; + +def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", + "NeoverseV1", + "Neoverse V1 ARM processors", [ + HasV8_4aOps, + FeatureBF16, + FeatureCacheDeepPersist, + FeatureCrypto, + FeatureFPARMv8, + FeatureFP16FML, + FeatureFullFP16, + FeatureFuseAES, + FeatureMatMulInt8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureRandGen, + FeatureSPE, + FeatureSSBS, + FeatureSVE]>; + +def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", + "Qualcomm Saphira processors", [ + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeatureSPE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast, + HasV8_4aOps]>; + +def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", + "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureAggressiveFMA, + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + HasV8_1aOps]>; + +def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", + "ThunderX3T110", + "Marvell ThunderX3 processors", [ + FeatureAggressiveFMA, + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + FeaturePAuth, + FeatureUseAA, + FeatureBalanceFPOps, + FeaturePerfMon, + FeatureStrictAlign, + HasV8_3aOps]>; + +def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", + "ThunderXT88", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", + "ThunderXT81", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", + "ThunderXT83", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", + "HiSilicon TS-V110 processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSPE, + FeatureFullFP16, + FeatureFP16FML, + FeatureDotProd]>; + +def : ProcessorModel<"generic", NoSchedModel, [ + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, +// ETE and TRBE are future architecture extensions. We temporarily enable them +// by default for users targeting generic AArch64, until it is decided in which +// armv8.x-a architecture revision they will end up. The extensions do not +// affect code generated by the compiler and can be used only by explicitly +// mentioning the new system register names in assembly. + FeatureETE + ]>; + +def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>; +def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>; +def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; +def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; +def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>; +def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>; +def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>; +def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>; +def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>; +def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>; +def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>; +def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>; +def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>; +def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>; +def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>; +def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>; +def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>; +def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>; +def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>; +def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>; +def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>; +def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>; +def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>; +def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; +def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>; +def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; +// Cavium ThunderX/ThunderX T8X Processors +def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>; +def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>; +def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; +def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; +// Marvell ThunderX3T110 Processors. +def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>; +def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>; + +// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. +def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>; + +// iPhone and iPad CPUs +def : ProcessorModel<"apple-a7", CycloneModel, [ProcAppleA7]>; +def : ProcessorModel<"apple-a8", CycloneModel, [ProcAppleA7]>; +def : ProcessorModel<"apple-a9", CycloneModel, [ProcAppleA7]>; +def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>; +def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>; +def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>; +def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>; +def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>; + +// watch CPUs. +def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>; +def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>; + +// Alias for the latest Apple processor model supported by LLVM. +def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>; + +// Fujitsu A64FX +def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>; + +// Nvidia Carmel +def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>; + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def GenericAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "generic"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +def AppleAsmParserVariant : AsmParserVariant { + int Variant = 1; + string Name = "apple-neon"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// AArch64 Uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def GenericAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def AppleAsmWriter : AsmWriter { + let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; + int Variant = 1; + int isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; + let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; + let AllowRegisterRenaming = 1; +} + +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "AArch64PfmCounters.td" diff --git a/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td b/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td new file mode 100644 index 0000000000000000000000000000000000000000..a55537c16a4af2d919afa980635c594733869cca --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td @@ -0,0 +1,512 @@ +//=- AArch64ArkGcCallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for AArch64 architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfBigEndian - Match only if we're in big endian mode. +class CCIfBigEndian : + CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; + +class CCIfILP32 : + CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; + + +//===----------------------------------------------------------------------===// +// ARM AAPCS64 Calling Convention +//===----------------------------------------------------------------------===// + +let Entry = 1 in +def CC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter. + // However, on windows, in some circumstances, the SRet is passed in X0 or X1 + // instead. The presence of the inreg attribute indicates that SRet is + // passed in the alternative register (X0 or X1), not X8: + // - X0 for non-instance methods. + // - X1 for instance methods. + + // The "sret" attribute identifies indirect returns. + // The "inreg" attribute identifies non-aggregate types. + // The position of the "sret" attribute identifies instance/non-instance + // methods. + // "sret" on argument 0 means non-instance methods. + // "sret" on argument 1 means instance methods. + + CCIfInReg>>>>, + + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // The 'nest' parameter, if any, is passed in X18. + // Darwin uses X18 as the platform register and hence 'nest' isn't currently + // supported there. + CCIfNest>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in X21. + CCIfSwiftError>>, + + CCIfConsecutiveRegs>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], + CCPassIndirect>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>>, + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCPassIndirect>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], CCIfSplit>>, + + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>, + CCIfType<[i32, f32], CCAssignToStack<8, 8>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + +let Entry = 1 in +def RetCC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + CCIfConsecutiveRegs>, + CCIfSwiftError>>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>> +]>; + +// Vararg functions on windows pass floats in integer registers +let Entry = 1 in +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, bf16, f32], CCPromoteToType>, + CCIfType<[f64], CCBitConvertToType>, + CCDelegateTo +]>; + +// Windows Control Flow Guard checks take a single argument (the target function +// address) and have no return value. +let Entry = 1 in +def CC_AArch64_Win64_CFGuard_Check : CallingConv<[ + CCIfType<[i64], CCAssignToReg<[X15]>> +]>; + + +// Darwin uses a calling convention which differs in only two ways +// from the standard one at this level: +// + i128s (i.e. split i64s) don't need even registers. +// + Stack slots are sized as needed rather than being at least 64-bit. +let Entry = 1 in +def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in X21. + CCIfSwiftError>>, + + CCIfConsecutiveRegs>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], + CCIfSplit>>, + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16", + CCAssignToStack<2, 2>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Re-demote pointers to 32-bits so we don't end up storing 64-bit + // values and clobbering neighbouring stack locations. Not very pretty. + CCIfPtr>>, + CCIfPtr>>, + + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + +let Entry = 1 in +def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + CCIfConsecutiveRegs>, + + // Handle all scalar types as either i64 or f64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + CCIfType<[f16, bf16, f32], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + +// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the +// same as the normal Darwin VarArgs handling. +let Entry = 1 in +def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // Handle all scalar types as either i32 or f32. + CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[f16, bf16], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfPtr>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16], + CCAssignToStack<16, 16>> +]>; + + +// The WebKit_JS calling convention only passes the first argument (the callee) +// in register and the remaining arguments on stack. We allow 32bit stack slots, +// so that WebKit can write partial values in the stack and define the other +// 32bit quantity as undef. +let Entry = 1 in +def CC_AArch64_WebKit_JS : CallingConv<[ + // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, + + // Pass the remaining arguments on the stack instead. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +let Entry = 1 in +def RetCC_AArch64_WebKit_JS : CallingConv<[ + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +let Entry = 1 in +def CC_AArch64_GHC : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, FP, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; + +// The order of the callee-saves in this file is important, because the +// FrameLowering code will use this order to determine the layout the +// callee-save area in the stack frame. As can be observed below, Darwin +// requires the frame-record (LR, FP) to be at the top the callee-save area, +// whereas for other platforms they are at the bottom. + +// FIXME: LR is only callee-saved in the sense that *we* preserve it and are +// presumably a callee to someone. External functions may not do so, but this +// is currently safe since BL has LR as an implicit-def and what happens after a +// tail call doesn't matter. +// +// It would be better to model its preservation semantics properly (create a +// vreg on entry, use it in RET & tail call generation; make that vreg def if we +// end up saving LR as part of a call frame). Watch this space... +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15, + LR, FP)>; + +// A variant for treating X18 as callee saved, when interfacing with +// code that needs X18 to be preserved. +def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>; + +// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. +// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, +// and not (LR,FP) pairs. +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, FP, LR, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// The Control Flow Guard check call uses a custom calling convention that also +// preserves X0-X8 and Q0-Q7. +def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS, + (sequence "X%u", 0, 8), + (sequence "Q%u", 0, 7))>; + +// AArch64 PCS for vector functions (VPCS) +// must (additionally) preserve full Q8-Q23 registers +def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + (sequence "Q%u", 8, 23))>; + +// Functions taking SVE arguments or returning an SVE type +// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 +def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23), + (sequence "P%u", 4, 15), + X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP)>; + +// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since +// 'this' and the pointer return value are both passed in X0 in these cases, +// this can be partially modelled by treating X0 as a callee-saved register; +// only the resulting RegMask is used; the SaveList is ignored +// +// (For generic ARM 64-bit ABI code, clang will not generate constructors or +// destructors with 'this' returns, so this RegMask will not be used in that +// case) +def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; + +def CSR_AArch64_AAPCS_SwiftError + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + +// The ELF stub used for TLS-descriptor access saves every feasible +// register. Only X0 and LR are clobbered. +def CSR_AArch64_TLS_ELF + : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_AllRegs + : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, + (sequence "X%u", 0, 28), FP, LR, SP, + (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), + (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sequence "X%u", 9, 15))>; + +def CSR_AArch64_StackProbe_Windows + : CalleeSavedRegs<(add (sequence "X%u", 0, 15), + (sequence "X%u", 18, 28), FP, SP, + (sequence "Q%u", 0, 31))>; + +// Darwin variants of AAPCS. +// Darwin puts the frame-record at the top of the callee-save area. +def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, + X22, X23, X24, X25, X26, X27, + X28, (sequence "Q%u", 8, 23))>; +def CSR_Darwin_AArch64_AAPCS_ThisReturn + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>; + +def CSR_Darwin_AArch64_AAPCS_SwiftError + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; + +// The function used by Darwin to obtain the address of a thread-local variable +// guarantees more than a normal AAPCS function. x16 and x17 are used on the +// fast path for calculation, but other registers except X0 (argument/return) +// and LR (it is a call, after all) are preserved. +def CSR_Darwin_AArch64_TLS + : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), + FP, + (sequence "Q%u", 0, 31))>; + +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_Darwin_AArch64_TLS, +// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS. +def CSR_Darwin_AArch64_CXX_TLS + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_Darwin_AArch64_CXX_TLS_PE + : CalleeSavedRegs<(add LR, FP)>; + +// CSRs that are handled explicitly via copies. +def CSR_Darwin_AArch64_CXX_TLS_ViaCopy + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>; + +def CSR_Darwin_AArch64_RT_MostRegs + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>; + +// Variants of the standard calling conventions for shadow call stack. +// These all preserve x18 in addition to any other registers. +def CSR_AArch64_NoRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; +def CSR_AArch64_AllRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; +def CSR_AArch64_AAPCS_SwiftError_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; +def CSR_AArch64_RT_MostRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; +def CSR_AArch64_AAVPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>; +def CSR_AArch64_SVE_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>; +def CSR_AArch64_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 85e3426b049e9c57e2766454661f85fda17756db..71c7b3e170a2788ffbe3ae3f1dfa1563f70006d7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1379,6 +1379,18 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, } } +#ifdef ARK_GC_SUPPORT +Triple::ArchType AArch64FrameLowering::GetArkSupportTarget() const +{ + return Triple::aarch64; +} + +int AArch64FrameLowering::GetFixedFpPosition() const +{ + return -1; +} +#endif + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -1477,8 +1489,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. + #ifndef ARK_GC_SUPPORT if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + #endif + // asm-int GHC call webkit function, we need push regs to stack. // OHOS_LOCAL begin if (HasFP && (MF.getFunction().getCallingConv() == CallingConv::ArkFast0 || @@ -1946,8 +1961,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. + #ifndef ARK_GC_SUPPORT if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + #endif + // asm-int GHC call webkit function, we need push regs to stack. // How much of the stack used by incoming arguments this function is expected // to restore in this particular epilogue. @@ -2971,8 +2989,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. + #ifndef ARK_GC_SUPPORT if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + #endif + // asm-int GHC call webkit function, we need push regs to stack. TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index bab56e9f8645d110be6dbc79bbaff49956b23bbe..a6d4206a7c42f2e93ddcf2b1bc7b243b48a6ceba 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -16,6 +16,9 @@ #include "AArch64StackProtectorRetLowering.h" // OHOS_LOCAL #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Support/TypeSize.h" +#ifdef ARK_GC_SUPPORT +#include "llvm/ADT/Triple.h" +#endif namespace llvm { @@ -42,6 +45,10 @@ public: /// the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; +#ifdef ARK_GC_SUPPORT + Triple::ArchType GetArkSupportTarget() const override; + int GetFixedFpPosition() const override; +#endif const StackProtectorRetLowering *getStackProtectorRet() const override; // OHOS_LOCAL diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7c5b5251aed33e833d999ed0053cf90fb74733aa..f4e9bd79041778102964f646410e39c6ea8a55fd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6239,8 +6239,13 @@ SDValue AArch64TargetLowering::LowerCallResult( /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { +#ifdef ARK_GC_SUPPORT + return ((CC == CallingConv::GHC || CC == CallingConv::Fast) && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#else return (CC == CallingConv::Fast && GuaranteeTailCalls) || CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#endif } /// Return true if we might ever do TCO for calls with this calling convention. @@ -6491,8 +6496,13 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const { +#ifdef ARK_GC_SUPPORT + return ((CallCC == CallingConv::GHC || CallCC == CallingConv::Fast) && TailCallOpt) || + CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; +#else return (CallCC == CallingConv::Fast && TailCallOpt) || CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; +#endif } // Check if the value is zero-extended from i1 to i8 diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 95294679f3eed1a5a4a3a03af67eacd1b7c1565b..49c45cec1e911469029559c5a30c6f656d14a7b5 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -354,6 +354,17 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->hasFP(MF) || TT.isOSDarwin()) markSuperRegs(Reserved, AArch64::W29); +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + markSuperRegs(Reserved, AArch64::W29); + markSuperRegs(Reserved, AArch64::W30); + } + if ((MF.getFunction().getCallingConv() == CallingConv::WebKit_JS) || + (MF.getFunction().getCallingConv() == CallingConv::C)) { + markSuperRegs(Reserved, AArch64::W30); + } +#endif + for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) { if (MF.getSubtarget().isXRegisterReserved(i)) markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i)); diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index 178b9eb34643c695f226cb3644b3ca83486fa883..b4ff802e7c867cebb6ce4cdfb062e3b56cfe4b61 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -1,7 +1,10 @@ add_llvm_component_group(AArch64 HAS_JIT) -set(LLVM_TARGET_DEFINITIONS AArch64.td) - +if(BUILD_ARK_GC_SUPPORT) + set(LLVM_TARGET_DEFINITIONS AArch64ArkGc.td) +else() + set(LLVM_TARGET_DEFINITIONS AArch64.td) +endif(BUILD_ARK_GC_SUPPORT) tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 48277a8504115d0c5d02c27c095d8db18fb2453f..62bf5f439fe6f8059e23bd9193aff7d662255d9f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -341,8 +341,13 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { } // namespace static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { +#ifdef ARK_GC_SUPPORT + return ((CallConv == CallingConv::GHC || CallConv == CallingConv::Fast) && TailCallOpt) || + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; +#else return (CallConv == CallingConv::Fast && TailCallOpt) || CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; +#endif } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -662,8 +667,13 @@ bool AArch64CallLowering::lowerFormalArguments( /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { +#ifdef ARK_GC_SUPPORT + return ((CC == CallingConv::GHC || CC == CallingConv::Fast) && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#else return (CC == CallingConv::Fast && GuaranteeTailCalls) || CC == CallingConv::Tail || CC == CallingConv::SwiftTail; +#endif } /// Return true if we might ever do TCO for calls with this calling convention. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index f006386f7508dfede892ebfd15af3da669fcc967..6ba99e07fd70ff80f77f85662e50c3b89f3b8612 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1470,6 +1470,37 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { - for 32-bit code, substitute %e?? registers for %r?? */ +#ifdef ARK_GC_SUPPORT +Triple::ArchType X86FrameLowering::GetArkSupportTarget() const +{ + return Is64Bit ? Triple::x86_64 : Triple::x86; +} + +int X86FrameLowering::GetFixedFpPosition() const +{ + return 2; +} + +int X86FrameLowering::GetFrameReserveSize(MachineFunction &MF) const +{ + int slotSize = sizeof(uint64_t); + if (!Is64Bit) { + slotSize = sizeof(uint32_t); + } + int reserveSize = 0; + MF.getFunction() + .getFnAttribute("frame-reserved-slots") + .getValueAsString() + .getAsInteger(10, reserveSize); + + // x86-64 shoule align 16 bytes + if (Is64Bit) { + return RoundUp(reserveSize, 2 * sizeof(uint64_t)); + } + return reserveSize; +} +#endif + void X86FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&STI == &MF.getSubtarget() && @@ -1749,6 +1780,20 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, else MFI.setOffsetAdjustment(-StackSize); } +#ifdef ARK_GC_SUPPORT + // push marker + if (MF.getFunction().hasFnAttribute("frame-reserved-slots")) + { + unsigned StackPtr = TRI->getStackRegister(); + int reserveSize = GetFrameReserveSize(MF); + const unsigned SUBOpc = + getSUBriOpcode(Uses64BitFramePtr, reserveSize); + BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(reserveSize) + .setMIFlag(MachineInstr::FrameSetup); + } +#endif // For EH funclets, only allocate enough space for outgoing calls. Save the // NumBytes value that we would've used for the parent frame. @@ -2215,6 +2260,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // AfterPop is the position to insert .cfi_restore. MachineBasicBlock::iterator AfterPop = MBBI; if (HasFP) { +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().hasFnAttribute("frame-reserved-slots")) + { + + int reserveSize = GetFrameReserveSize(MF); + int slotSize = sizeof(uint32_t); + if (Is64Bit) { + slotSize = sizeof(uint64_t); + } + for (int i = 0; i < reserveSize / slotSize; i++) { + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); + } + } +#endif if (X86FI->hasSwiftAsyncContext()) { // Discard the context. int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); @@ -2639,6 +2700,12 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( } } +#ifdef ARK_GC_SUPPORT + int reserveSize = GetFrameReserveSize(MF); + SpillSlotOffset -= reserveSize; // skip frame reserved + CalleeSavedFrameSize += reserveSize; +#endif + // Assign slots for GPRs. It increases frame size. for (CalleeSavedInfo &I : llvm::reverse(CSI)) { Register Reg = I.getReg(); diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 13176a290bbef2527b4fc322cc387fb5919c57cf..76f82910d768fd83c4728e942b24b8ab6de2d6df 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -74,6 +74,11 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. +#ifdef ARK_GC_SUPPORT + Triple::ArchType GetArkSupportTarget() const override; + int GetFixedFpPosition() const override; + int GetFrameReserveSize(MachineFunction &MF) const override; +#endif void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a75ee58ad7f75ebaa0df7aaae7b8c877a48bbb6f..c22915ce3fbf852854fa2b9ba253d0846f1f1ea3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4400,8 +4400,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); +#ifdef ARK_GC_SUPPORT + assert(!(isVarArg && canGuaranteeTCO(CallConv) && (CallConv != CallingConv::GHC)) && + "Var args not supported with calling convention fastcc, ghc or hipe"); +#else assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); +#endif // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index abde64b61b85ec09ef8464f07128785a36550c32..6b4acbcf70ad12f0356f5a6dcc5098d65633ee84 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -611,6 +611,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) Reserved.set(SubReg); } +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) + Reserved.set(SubReg); + } +#endif // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) {