diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 3d6f5e6f9d3dae03274317937162125faa537f71..2bbdfcae6ae8fed17ad69c324adc53804899f10f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -640,6 +640,11 @@ option(LLVM_BUILD_RUNTIME
   "Build the LLVM runtime libraries." ON)
 option(LLVM_BUILD_EXAMPLES
   "Build the LLVM example programs. If OFF, just generate build targets." OFF)
+option(BUILD_ARK_GC_SUPPORT
+	"ARK support GC. If ON, support GC." OFF)
+if(BUILD_ARK_GC_SUPPORT)
+   add_definitions(-DARK_GC_SUPPORT)
+endif(BUILD_ARK_GC_SUPPORT)
 option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
 
 if(LLVM_BUILD_EXAMPLES)
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index bb9e872b6ec5ca385c659998d363c63a789753ae..dc54d265f4907ee2bfff94a185e97636db45f951 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -4034,6 +4034,12 @@ LLVM_ATTRIBUTE_C_DEPRECATED(
 LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn,
                             LLVMValueRef *Args, unsigned NumArgs,
                             const char *Name);
+#ifdef ARK_GC_SUPPORT
+LLVMValueRef LLVMBuildCall3(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+                            LLVMValueRef *Args, unsigned NumArgs,
+                            const char *Name, LLVMValueRef *deoptVals,
+                            int NumVals);
+#endif
 LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If,
                              LLVMValueRef Then, LLVMValueRef Else,
                              const char *Name);
diff --git a/llvm/include/llvm-c/ExecutionEngine.h b/llvm/include/llvm-c/ExecutionEngine.h
index c5fc9bdb4d07f62462c65924e6ae8faf75748dec..ccd4e5164165ef040e3ff07957d79fa2d2877ce9 100644
--- a/llvm/include/llvm-c/ExecutionEngine.h
+++ b/llvm/include/llvm-c/ExecutionEngine.h
@@ -42,6 +42,9 @@ typedef struct LLVMOpaqueMCJITMemoryManager *LLVMMCJITMemoryManagerRef;
 
 struct LLVMMCJITCompilerOptions {
   unsigned OptLevel;
+#ifdef ARK_GC_SUPPORT
+  LLVMRelocMode RelMode;
+#endif
   LLVMCodeModel CodeModel;
   LLVMBool NoFramePointerElim;
   LLVMBool EnableFastISel;
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 6cd595e87e6445c021a97d638a14965f8ec72be7..2e96fcd0415dce5d7ed5bf63c7e01f96bc15a18b 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -18,6 +18,9 @@
 #include "llvm/Support/TypeSize.h"
 #include "llvm/IR/CallingConv.h" // OHOS_LOCAL
 #include <vector>
+#ifdef ARK_GC_SUPPORT
+#include "llvm/ADT/Triple.h"
+#endif
 
 namespace llvm {
   class BitVector;
@@ -222,6 +225,26 @@ public:
   /// emitZeroCallUsedRegs - Zeros out call used registers.
   virtual void emitZeroCallUsedRegs(BitVector RegsToZero,
                                     MachineBasicBlock &MBB) const {}
+  #ifdef ARK_GC_SUPPORT
+  template <typename T>
+  constexpr T RoundUp(T x, size_t n) const
+  {
+      static_assert(std::is_integral<T>::value, "T must be integral");
+      return (static_cast<size_t>(x) + n - 1U) & (-n);
+  }
+
+  virtual Triple::ArchType GetArkSupportTarget() const
+  {
+    return Triple::UnknownArch;
+  }
+
+  virtual int GetFixedFpPosition() const
+  {
+    return 2;
+  }
+
+  virtual int GetFrameReserveSize(MachineFunction &MF) const;
+  #endif
 
   /// OHOS_LOCAL begin
   /// Instances about backward cfi and stack protection provided by different architectures.
diff --git a/llvm/include/llvm/IR/LegacyPassManager.h b/llvm/include/llvm/IR/LegacyPassManager.h
index b3a4820ba0e492b10bf3751688212ef1f7d155b8..2d82c2561cebbbaaf4252758fa261a5333fbb957 100644
--- a/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/llvm/include/llvm/IR/LegacyPassManager.h
@@ -16,6 +16,9 @@
 #ifndef LLVM_IR_LEGACYPASSMANAGER_H
 #define LLVM_IR_LEGACYPASSMANAGER_H
 
+#ifdef ARK_GC_SUPPORT
+#include "llvm/Pass.h"
+#endif
 #include "llvm/Support/CBindingWrapping.h"
 
 namespace llvm {
diff --git a/llvm/include/llvm/Target/CodeGenCWrappers.h b/llvm/include/llvm/Target/CodeGenCWrappers.h
index a995463570535d04ccb0c378639c076760b88c73..5929c7efe2126d35ce3b07117042c4745e5d96cb 100644
--- a/llvm/include/llvm/Target/CodeGenCWrappers.h
+++ b/llvm/include/llvm/Target/CodeGenCWrappers.h
@@ -59,6 +59,37 @@ inline LLVMCodeModel wrap(CodeModel::Model Model) {
   }
   llvm_unreachable("Bad CodeModel!");
 }
+
+#ifdef ARK_GC_SUPPORT
+inline Reloc::Model unwrap(LLVMRelocMode Model) {
+  switch (Model) {
+  case LLVMRelocDefault:
+  case LLVMRelocStatic:
+    return Reloc::Static;
+  case LLVMRelocPIC:
+    return Reloc::PIC_;
+  case LLVMRelocDynamicNoPic:
+    return Reloc::DynamicNoPIC;
+  }
+  llvm_unreachable("Invalid LLVMRelocMode!");
+}
+
+inline LLVMRelocMode unwrap(Reloc::Model Model) {
+  switch (Model) {
+  case Reloc::Static:
+    return LLVMRelocStatic;
+  case Reloc::PIC_:
+    return LLVMRelocPIC;
+  case Reloc::DynamicNoPIC:
+    return LLVMRelocDynamicNoPic;
+  case Reloc::ROPI:
+  case Reloc::RWPI:
+  case Reloc::ROPI_RWPI:
+    break;
+  }
+  llvm_unreachable("Invalid Reloc::Model!");
+}
+#endif
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 2cac1b55c3dda0af1bf6e7f3cd8fd8dc8aceb73d..5dbcd0facfd15df3f93b5923e1f015013c1c1c5f 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -68,6 +68,11 @@
 #include <utility>
 #include <vector>
 
+#ifdef ARK_GC_SUPPORT
+#include <string>
+#include <climits>
+#endif
+
 using namespace llvm;
 
 #define DEBUG_TYPE "prologepilog"
@@ -121,6 +126,9 @@ private:
 
   void calculateCallFrameInfo(MachineFunction &MF);
   void calculateSaveRestoreBlocks(MachineFunction &MF);
+#ifdef ARK_GC_SUPPORT
+  void RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI);
+#endif
   void spillCalleeSavedRegs(MachineFunction &MF);
 
   void calculateFrameObjectOffsets(MachineFunction &MF);
@@ -314,6 +322,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   RestoreBlocks.clear();
   MFI.setSavePoint(nullptr);
   MFI.setRestorePoint(nullptr);
+#ifdef ARK_GC_SUPPORT
+  std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  RecordCalleeSaveRegisterAndOffset(MF, CSI);
+#endif
   return true;
 }
 
@@ -649,6 +661,69 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
   }
 }
 
+#ifdef ARK_GC_SUPPORT
+void PEI::RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI)
+{
+  MachineModuleInfo &MMI = MF.getMMI();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+  Function &func = const_cast<Function &>(MF.getFunction());
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  Triple::ArchType archType = TFI->GetArkSupportTarget();
+
+  if ((archType != Triple::aarch64 && archType != Triple::x86_64) || !(TFI->hasFP(MF))) {
+    return;
+  }
+  unsigned FpRegDwarfNum = 0;
+  if (archType == Triple::aarch64) {
+    FpRegDwarfNum = 29; // x29
+  } else {
+    FpRegDwarfNum = 6; //rbp
+  }
+  int64_t FpOffset = 0;
+  int64_t deleta;
+  // nearest to rbp callee register
+  int64_t maxOffset = INT_MIN;
+  for (auto I : CSI) {
+    int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+    unsigned Reg = I.getReg();
+    unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, true);
+    if (FpRegDwarfNum == DwarfRegNum) {
+      FpOffset = Offset;
+    }
+    maxOffset = std::max(Offset, maxOffset);
+  }
+  if (archType == Triple::x86_64) {
+    // rbp not existed in CSI
+    int64_t reseversize = TFI->GetFrameReserveSize(MF) + sizeof(uint64_t); // 1: rbp
+    deleta = maxOffset + reseversize; // nearest to rbp offset
+  } else {
+    deleta = FpOffset;
+  }
+
+  const unsigned LinkRegDwarfNum = 30;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+    I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
+    unsigned Reg = I->getReg();
+    unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, true);
+    if ((DwarfRegNum == LinkRegDwarfNum || DwarfRegNum == FpRegDwarfNum)
+      && (archType == Triple::aarch64)) {
+      continue;
+    }
+    Offset = Offset - deleta;
+    std::string key = std::string("DwarfReg") + std::to_string(DwarfRegNum);
+    std::string value = std::to_string(Offset);
+    LLVM_DEBUG(dbgs() << "RecordCalleeSaveRegisterAndOffset DwarfRegNum  :"
+                      << DwarfRegNum << " key:" << key
+                      << " value:" << value
+                      << "]\n");
+    Attribute attr = Attribute::get(func.getContext(), key.c_str(), value.c_str());
+    func.addAttributeAtIndex(AttributeList::FunctionIndex, attr);
+  }
+}
+#endif
+
 void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
   // We can't list this requirement in getRequiredProperties because some
   // targets (WebAssembly) use virtual registers past this point, and the pass
@@ -937,6 +1012,88 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
   // stack area.
   int64_t FixedCSEnd = Offset;
 
+#ifdef ARK_GC_SUPPORT
+  int CalleeSavedFrameSize = 0;
+  Triple::ArchType archType = TFI.GetArkSupportTarget();
+  if (archType == Triple::aarch64 && TFI.hasFP(MF)) {
+    int fpPosition = TFI.GetFixedFpPosition();
+    int slotSize = sizeof(uint64_t);
+    int fpToCallerSpDelta = 0;
+    // 0:not exist  +:count from head -:count from tail
+    //   for x86-64
+    //   +--------------------------+
+    //   |       caller Frame       |
+    //   +--------------------------+---
+    //   |       returnAddr         |  ^
+    //   +--------------------------+  2 slot(fpToCallerSpDelta)
+    //   |       Fp                 |  V  fpPosition = 2
+    //   +--------------------------+---
+    //   |       type               |
+    //   +--------------------------+
+    //   |       ReServeSize        |
+    //   +--------------------------+
+    //   |          R14             |
+    //   +--------------------------+
+    //   |          R13             |
+    //   +--------------------------+
+    //   |          R12             |
+    //   +--------------------------+
+    //   |          RBX             |
+    //   +--------------------------+
+    //   for ARM64
+    //   +--------------------------+
+    //   |       caller Frame       |
+    //   +--------------------------+---
+    //   |  callee save registers   |  ^
+    //   |      (exclude Fp)        |  |
+    //   |                          |  callee save registers size(fpToCallerSpDelta)
+    //   +--------------------------+  |
+    //   |          Fp              |  V  fpPosition = -1
+    //   +--------------------------+--- FixedCSEnd
+    //   |         type             |
+    //   +--------------------------+
+    //   |       ReServeSize        |
+    //   +--------------------------+
+    if (fpPosition >= 0) {
+      fpToCallerSpDelta = fpPosition * slotSize;
+    } else {
+      fpToCallerSpDelta = FixedCSEnd + (fpPosition + 1) * slotSize;
+    }
+    Function &func = const_cast<Function &>(MF.getFunction());
+    Attribute attr = Attribute::get(func.getContext(), "fpToCallerSpDelta", std::to_string(fpToCallerSpDelta).c_str());
+    func.addAttributeAtIndex(AttributeList::FunctionIndex, attr);
+
+    CalleeSavedFrameSize = TFI.GetFrameReserveSize(MF);
+    Offset += CalleeSavedFrameSize;
+  }
+
+  if ((archType == Triple::x86_64) && TFI.hasFP(MF)) {
+    // Determine which of the registers in the callee save list should be saved.
+    int fpPosition = TFI.GetFixedFpPosition();
+    int fpToCallerSpDelta = 0;
+    int slotSize = sizeof(uint64_t);
+    if (fpPosition >= 0) {
+      fpToCallerSpDelta = fpPosition * slotSize;
+    } else {
+      fpToCallerSpDelta = FixedCSEnd + (fpPosition + 1) * slotSize;
+    }
+    Function &func = const_cast<Function &>(MF.getFunction());
+    Attribute attr = Attribute::get(func.getContext(), "fpToCallerSpDelta", std::to_string(fpToCallerSpDelta).c_str());
+    func.addAttributeAtIndex(AttributeList::FunctionIndex, attr);
+
+    CalleeSavedFrameSize = TFI.GetFrameReserveSize(MF);
+    std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+    LLVM_DEBUG(dbgs() << "  CSI size: " << CSI.size() << " CalleeSavedFrameSize " << CalleeSavedFrameSize << "\n");
+    // if callee-saved is empty, the reserved-size can't be passed to the computation of local zone
+    // because the assignCalleeSavedSpillSlots() directly return.
+    // Otherwise, the reserved-size don't need to add to the computation of local zone because it has been considered
+    // while computing the offsets of callee-saved-zone that will be passed to the computation of local-zone
+    if (CSI.empty()) {
+      Offset += CalleeSavedFrameSize;
+    }
+  }
+#endif
+
   // Make sure the special register scavenging spill slot is closest to the
   // incoming stack pointer if a frame pointer is required and is closer
   // to the incoming rather than the final stack pointer.
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index ccaff862fa3f3852e372c1c8f661293e226ee828..9254f3af5add64cde34d188804ddda4e575589f9 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -29,6 +29,9 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#ifdef ARK_GC_SUPPORT
+#include "llvm/Target/TargetMachine.h"
+#endif
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -599,10 +602,11 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
   // Function Frame records.
   LLVM_DEBUG(dbgs() << WSMP << "functions:\n");
   for (auto const &FR : FnInfos) {
-    LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first
-                      << " frame size: " << FR.second.StackSize
-                      << " callsite count: " << FR.second.RecordCount << '\n');
-    OS.emitSymbolValue(FR.first, 8);
+    #ifdef ARK_GC_SUPPORT
+      OS.emitSymbolValue(FR.first, AP.TM.getProgramPointerSize());
+    #else
+      OS.emitSymbolValue(FR.first, 8);
+    #endif
     OS.emitIntValue(FR.second.StackSize, 8);
     OS.emitIntValue(FR.second.RecordCount, 8);
   }
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 0007c44b859ca2c94c3ac8c79b8bcb723e71e890..0e026bdf42f12e902b26ca7c0a53d4a9bf7a3d8e 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -179,3 +179,17 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
   return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF)}};
 }
+
+#ifdef ARK_GC_SUPPORT
+int TargetFrameLowering::GetFrameReserveSize(MachineFunction &MF) const
+{
+    int slotSize = sizeof(uint64_t);
+    int64_t marker = 0x0;
+    int reserveSize = 0;
+    MF.getFunction()
+      .getFnAttribute("frame-reserved-slots")
+      .getValueAsString()
+      .getAsInteger(10, marker);
+    return marker;
+}
+#endif
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 672fd7b991c25a6d144566324d485d156942a748..b4bbadc7f5e50a100b9168fe9d55619748a3f444 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -197,6 +197,9 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
   builder.setEngineKind(EngineKind::JIT)
          .setErrorStr(&Error)
          .setOptLevel((CodeGenOpt::Level)options.OptLevel)
+#ifdef ARK_GC_SUPPORT
+         .setRelocationModel(unwrap(options.RelMode))
+#endif
          .setTargetOptions(targetOptions);
   bool JIT;
   if (Optional<CodeModel::Model> CM = unwrap(options.CodeModel, JIT))
diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index f1eeee3b3599d236c90d1783813f4eca3d78dfc1..8ed1b504a029ca9dc661c0de96db9a2999b7ff90 100644
--- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -54,7 +54,22 @@ extern "C" {
 }
 
 namespace {
-
+#ifdef ARK_GC_SUPPORT
+// We put information about the JITed function in this global, which the
+// debugger reads.  Make sure to specify the version statically, because the
+// debugger checks the version before we can set it during runtime.
+struct jit_descriptor __jit_debug_descriptor = {1, 0, nullptr, nullptr};
+
+// Debuggers that implement the GDB JIT interface put a special breakpoint in
+// this function.
+LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() {
+  // The noinline and the asm prevent calls to this function from being
+  // optimized out.
+#if !defined(_MSC_VER)
+  asm volatile("" ::: "memory");
+#endif
+}
+#endif
 // FIXME: lli aims to provide both, RuntimeDyld and JITLink, as the dynamic
 // loaders for it's JIT implementations. And they both offer debugging via the
 // GDB JIT interface, which builds on the two well-known symbol names below.
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 08b7b0e1f9560519600776016b6cbf8ec8ce85a4..1369e572cb663aad0198c952d76574a3d2221e11 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -3920,6 +3920,25 @@ LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
                                     makeArrayRef(unwrap(Args), NumArgs), Name));
 }
 
+#ifdef ARK_GC_SUPPORT
+LLVMValueRef LLVMBuildCall3(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+                            LLVMValueRef *Args, unsigned NumArgs,
+                            const char *Name, LLVMValueRef *deoptVals,
+                            int NumVals) {
+  FunctionType *FTy = unwrap<FunctionType>(Ty);
+  std::vector<Value*> vals;
+  for (int i = 0; i < NumVals; i++) {
+    vals.push_back(unwrap(deoptVals[i]));
+  }
+  OperandBundleDefT<Value *> deoptBundle("deopt", vals);
+
+  return wrap(unwrap(B)->CreateCall(FTy, unwrap(Fn),
+                                    makeArrayRef(unwrap(Args), NumArgs), // Args
+                                    {deoptBundle}, // ArrayRef<OperandBundleDef>
+                                    Name));
+}
+#endif
+
 LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
                              LLVMValueRef Then, LLVMValueRef Else,
                              const char *Name) {
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index bf1a809e56336b106b956cf1063ee179c2f61d3e..8f3973ec9d32eca5ca24b5b9d409998d0ce7a60e 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -52,6 +52,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
     (void)ID;
   }
 
+#ifdef ARK_GC_SUPPORT
+  setOpaquePointers(false);
+#endif
+
   auto *DeoptEntry = pImpl->getOrInsertBundleTag("deopt");
   assert(DeoptEntry->second == LLVMContext::OB_deopt &&
          "deopt operand bundle id drifted!");
diff --git a/llvm/lib/Target/AArch64/AArch64ArkGc.td b/llvm/lib/Target/AArch64/AArch64ArkGc.td
new file mode 100644
index 0000000000000000000000000000000000000000..9615d02f4cec5f0d9a350d9e466adc7e0bb9112b
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64ArkGc.td
@@ -0,0 +1,1291 @@
+//=- AArch64ArkGc.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing.
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
+                                       "Enable ARMv8 FP">;
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+  "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
+
+def FeatureSM4 : SubtargetFeature<
+    "sm4", "HasSM4", "true",
+    "Enable SM3 and SM4 support", [FeatureNEON]>;
+
+def FeatureSHA2 : SubtargetFeature<
+    "sha2", "HasSHA2", "true",
+    "Enable SHA1 and SHA256 support", [FeatureNEON]>;
+
+def FeatureSHA3 : SubtargetFeature<
+    "sha3", "HasSHA3", "true",
+    "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>;
+
+def FeatureAES : SubtargetFeature<
+    "aes", "HasAES", "true",
+    "Enable AES support", [FeatureNEON]>;
+
+// Crypto has been split up and any combination is now valid (see the
+// crypto definitions above). Also, crypto is now context sensitive:
+// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2.
+// Therefore, we rely on Clang, the user interacing tool, to pass on the
+// appropriate crypto options. But here in the backend, crypto has very little
+// meaning anymore. We kept the Crypto definition here for backward
+// compatibility, and now imply features SHA2 and AES, which was the
+// "traditional" meaning of Crypto.
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+  "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>;
+
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+  "Enable ARMv8 CRC-32 checksum instructions">;
+
+def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
+  "Enable ARMv8 Reliability, Availability and Serviceability Extensions">;
+
+def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
+  "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+
+def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
+  "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">;
+
+def FeatureLDAPR : SubtargetFeature<"ldapr", "HasLDAPR", "true",
+  "Use LDAPR to lower atomic loads; experimental until we "
+  "have more testing/a formal correctness proof">;
+
+def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
+  "Enable out of line atomics to support LSE instructions">;
+
+def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
+  "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+
+def FeaturePAN : SubtargetFeature<
+    "pan", "HasPAN", "true",
+    "Enables ARM v8.1 Privileged Access-Never extension">;
+
+def FeatureLOR : SubtargetFeature<
+    "lor", "HasLOR", "true",
+    "Enables ARM v8.1 Limited Ordering Regions extension">;
+
+def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
+    "true", "Enable RW operand CONTEXTIDR_EL2" >;
+
+def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
+    "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >;
+
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+  "Enable ARMv8 PMUv3 Performance Monitors extension">;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+  "Full FP16", [FeatureFPARMv8]>;
+
+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+  "Enable FP16 FML instructions", [FeatureFullFP16]>;
+
+def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
+  "Enable Statistical Profiling extension">;
+
+def FeaturePAN_RWV : SubtargetFeature<
+    "pan-rwv", "HasPAN_RWV", "true",
+    "Enable v8.2 PAN s1e1R and s1e1W Variants",
+    [FeaturePAN]>;
+
+// UAO PState
+def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true",
+    "Enable v8.2 UAO PState">;
+
+def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
+    "true", "Enable v8.2 data Cache Clean to Point of Persistence" >;
+
+def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
+  "Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>;
+
+// This flag is currently still labeled as Experimental, but when fully
+// implemented this should tell the compiler to use the zeroing pseudos to
+// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
+// lanes are known to be zero. The pseudos will then be expanded using the
+// MOVPRFX instruction to zero the inactive lanes. This feature should only be
+// enabled if MOVPRFX instructions are known to merge with the destructive
+// operations they prefix.
+//
+// This feature could similarly be extended to support cheap merging of _any_
+// value into the inactive lanes using the MOVPRFX instruction that uses
+// merging-predication.
+def FeatureExperimentalZeroingPseudos
+    : SubtargetFeature<"use-experimental-zeroing-pseudos",
+                       "UseExperimentalZeroingPseudos", "true",
+                       "Hint to the compiler that the MOVPRFX instruction is "
+                       "merged with destructive operations",
+                       []>;
+
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+  "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+
+def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
+  "Enable Scalable Vector Extension 2 (SVE2) instructions",
+  [FeatureSVE, FeatureUseScalarIncVL]>;
+
+def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
+  "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
+
+def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
+  "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>;
+
+def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
+  "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
+
+def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true",
+  "Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
+
+def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
+                                        "Has zero-cycle register moves">;
+
+def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
+                                        "Has zero-cycle zeroing instructions for generic registers">;
+
+// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
+// as movi is more efficient across all cores. Newer cores can eliminate
+// fmovs early and there is no difference with movi, but this not true for
+// all implementations.
+def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
+                                        "Has no zero-cycle zeroing instructions for FP registers">;
+
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+                                        "Has zero-cycle zeroing instructions",
+                                        [FeatureZCZeroingGP]>;
+
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+    "HasZeroCycleZeroingFPWorkaround", "true",
+    "The zero-cycle floating-point zeroing instruction has a bug">;
+
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+                                          "RequiresStrictAlign", "true",
+                                          "Disallow all unaligned memory "
+                                          "access">;
+
+foreach i = {1-7,9-15,18,20-30} in // OHOS_LOCAL
+    def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
+                                             "Reserve X"#i#", making it unavailable "
+                                             "as a GPR">;
+
+foreach i = {8-15,18} in
+    def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
+         "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
+
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
+    "true",
+    "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+
+def FeaturePredictableSelectIsExpensive : SubtargetFeature<
+    "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
+    "Prefer likely predicted branches over selects">;
+
+def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
+    "HasCustomCheapAsMoveHandling", "true",
+    "Use custom handling of cheap instructions">;
+
+def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
+    "HasExynosCheapAsMoveHandling", "true",
+    "Use Exynos specific handling of cheap instructions",
+    [FeatureCustomCheapAsMoveHandling]>;
+
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+    "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
+    "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;
+
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+    "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
+
+def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
+    "IsStoreAddressAscend", "true",
+    "Schedule vector stores by ascending address">;
+
+def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
+    "true", "STR of Q register with register offset is slow">;
+
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
+    "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
+    "true", "Use alternative pattern for sextload convert to f32">;
+
+def FeatureArithmeticBccFusion : SubtargetFeature<
+    "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+    "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+    "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+    "CPU fuses arithmetic + cbz/cbnz operations">;
+
+def FeatureCmpBccFusion : SubtargetFeature<
+    "cmp-bcc-fusion", "HasCmpBccFusion", "true",
+    "CPU fuses cmp+bcc operations">;
+
+def FeatureFuseAddress : SubtargetFeature<
+    "fuse-address", "HasFuseAddress", "true",
+    "CPU fuses address generation and memory operations">;
+
+def FeatureFuseAES : SubtargetFeature<
+    "fuse-aes", "HasFuseAES", "true",
+    "CPU fuses AES crypto operations">;
+
+def FeatureFuseArithmeticLogic : SubtargetFeature<
+    "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
+    "CPU fuses arithmetic and logic operations">;
+
+def FeatureFuseCCSelect : SubtargetFeature<
+    "fuse-csel", "HasFuseCCSelect", "true",
+    "CPU fuses conditional select operations">;
+
+def FeatureFuseCryptoEOR : SubtargetFeature<
+    "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
+    "CPU fuses AES/PMULL and EOR operations">;
+
+def FeatureFuseAdrpAdd : SubtargetFeature<
+    "fuse-adrp-add", "HasFuseAdrpAdd", "true",
+    "CPU fuses adrp+add operations">;
+
+def FeatureFuseLiterals : SubtargetFeature<
+    "fuse-literals", "HasFuseLiterals", "true",
+    "CPU fuses literal generation operations">;
+
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+    "Disable latency scheduling heuristic">;
+
+def FeatureForce32BitJumpTables
+   : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+                      "Force jump table entries to be 32-bits wide except at MinSize">;
+
+def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
+                                   "Enable support for RCPC extension">;
+
+def FeatureUseRSqrt : SubtargetFeature<
+    "use-reciprocal-square-root", "UseRSqrt", "true",
+    "Use the reciprocal square root approximation">;
+
+def FeatureDotProd : SubtargetFeature<
+    "dotprod", "HasDotProd", "true",
+    "Enable dot product support">;
+
+def FeaturePAuth : SubtargetFeature<
+    "pauth", "HasPAuth", "true",
+    "Enable v8.3-A Pointer Authentication extension">;
+
+def FeatureJS : SubtargetFeature<
+    "jsconv", "HasJS", "true",
+    "Enable v8.3-A JavaScript FP conversion instructions",
+    [FeatureFPARMv8]>;
+
+def FeatureCCIDX : SubtargetFeature<
+    "ccidx", "HasCCIDX", "true",
+    "Enable v8.3-A Extend of the CCSIDR number of sets">;
+
+def FeatureComplxNum : SubtargetFeature<
+    "complxnum", "HasComplxNum", "true",
+    "Enable v8.3-A Floating-point complex number support",
+    [FeatureNEON]>;
+
+def FeatureNV : SubtargetFeature<
+    "nv", "HasNV", "true",
+    "Enable v8.4-A Nested Virtualization Enchancement">;
+
+def FeatureMPAM : SubtargetFeature<
+    "mpam", "HasMPAM", "true",
+    "Enable v8.4-A Memory system Partitioning and Monitoring extension">;
+
+def FeatureDIT : SubtargetFeature<
+    "dit", "HasDIT", "true",
+    "Enable v8.4-A Data Independent Timing instructions">;
+
+def FeatureTRACEV8_4 : SubtargetFeature<
+    "tracev8.4", "HasTRACEV8_4", "true",
+    "Enable v8.4-A Trace extension">;
+
+def FeatureAM : SubtargetFeature<
+    "am", "HasAM", "true",
+    "Enable v8.4-A Activity Monitors extension">;
+
+def FeatureAMVS : SubtargetFeature<
+    "amvs", "HasAMVS", "true",
+    "Enable v8.6-A Activity Monitors Virtualization support",
+    [FeatureAM]>;
+
+def FeatureSEL2 : SubtargetFeature<
+    "sel2", "HasSEL2", "true",
+    "Enable v8.4-A Secure Exception Level 2 extension">;
+
+def FeatureTLB_RMI : SubtargetFeature<
+    "tlb-rmi", "HasTLB_RMI", "true",
+    "Enable v8.4-A TLB Range and Maintenance Instructions">;
+
+def FeatureFlagM : SubtargetFeature<
+    "flagm", "HasFlagM", "true",
+    "Enable v8.4-A Flag Manipulation Instructions">;
+
+// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
+def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
+    "Enable v8.4-A RCPC instructions with Immediate Offsets",
+    [FeatureRCPC]>;
+
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+                                        "NegativeImmediates", "false",
+                                        "Convert immediates and instructions "
+                                        "to their negated or complemented "
+                                        "equivalent when the immediate does "
+                                        "not fit in the encoding.">;
+
+def FeatureLSLFast : SubtargetFeature<
+    "lsl-fast", "HasLSLFast", "true",
+    "CPU has a fastpath logical shift of up to 3 places">;
+
+def FeatureAggressiveFMA :
+  SubtargetFeature<"aggressive-fma",
+                   "HasAggressiveFMA",
+                   "true",
+                   "Enable Aggressive FMA for floating-point.">;
+
+def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true",
+  "Enable alternative NZCV format for floating point comparisons">;
+
+def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true",
+  "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
+  "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >;
+
+def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict",
+  "true", "Enable architectural speculation restriction" >;
+
+def FeatureSB : SubtargetFeature<"sb", "HasSB",
+  "true", "Enable v8.5 Speculation Barrier" >;
+
+def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS",
+  "true", "Enable Speculative Store Bypass Safe bit" >;
+
+def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true",
+  "Enable v8.5a execution and data prediction invalidation instructions" >;
+
+def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP",
+    "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >;
+
+def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI",
+    "true", "Enable Branch Target Identification" >;
+
+def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
+    "true", "Enable Random Number generation instructions" >;
+
+def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
+    "true", "Enable Memory Tagging Extension" >;
+
+def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE",
+    "true", "Enable Trace Buffer Extension">;
+
+def FeatureETE : SubtargetFeature<"ete", "HasETE",
+    "true", "Enable Embedded Trace Extension",
+    [FeatureTRBE]>;
+
+def FeatureTME : SubtargetFeature<"tme", "HasTME",
+    "true", "Enable Transactional Memory Extension" >;
+
+def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
+    "AllowTaggedGlobals",
+    "true", "Use an instruction sequence for taking the address of a global "
+    "that allows a memory tag in the upper address bits">;
+
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
+    "true", "Enable BFloat16 Extension" >;
+
+def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
+    "true", "Enable Matrix Multiply Int8 Extension">;
+
+def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32",
+    "true", "Enable Matrix Multiply FP32 Extension", [FeatureSVE]>;
+
+def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64",
+    "true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>;
+
+def FeatureXS : SubtargetFeature<"xs", "HasXS",
+    "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">;
+
+def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT",
+    "true", "Enable Armv8.7-A WFET and WFIT instruction">;
+
+def FeatureHCX : SubtargetFeature<
+    "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">;
+
+def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64",
+    "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">;
+
+def FeatureHBC : SubtargetFeature<"hbc", "HasHBC",
+    "true", "Enable Armv8.8-A Hinted Conditional Branches Extension">;
+
+def FeatureMOPS : SubtargetFeature<"mops", "HasMOPS",
+    "true", "Enable Armv8.8-A memcpy and memset acceleration instructions">;
+
+def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE",
+    "true", "Enable Branch Record Buffer Extension">;
+
+def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
+    "true", "Enable extra register in the Statistical Profiling Extension">;
+
+def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
+    "true", "Enable fine grained virtualization traps extension">;
+
+def FeatureEnhancedCounterVirtualization :
+      SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization",
+      "true", "Enable enhanced counter virtualization extension">;
+
+def FeatureRME : SubtargetFeature<"rme", "HasRME",
+    "true", "Enable Realm Management Extension">;
+
+def FeatureSME : SubtargetFeature<"sme", "HasSME", "true",
+  "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureUseScalarIncVL]>;
+
+def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
+  "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>;
+
+def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true",
+  "Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>;
+
+def FeatureAppleA7SysReg  : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
+  "Apple A7 (the CPU formerly known as Cyclone)">;
+
+def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true",
+  "Enable Exception Level 2 Virtual Memory System Architecture">;
+
+def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
+  "Enable Exception Level 3">;
+
+def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
+  "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
+
+def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
+                                                 "NoBTIAtReturnTwice", "true",
+                                                 "Don't place a BTI instruction "
+                                                 "after a return-twice">;
+
+//===----------------------------------------------------------------------===//
+// Architectures.
+//
+def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true",
+  "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>;
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+  "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE,
+  FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>;
+
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+  "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
+  FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
+
+def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
+  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth,
+  FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
+
+def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
+  "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
+  FeatureNV, FeatureMPAM, FeatureDIT,
+  FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+  FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>;
+
+def HasV8_5aOps : SubtargetFeature<
+  "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
+  [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
+   FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
+   FeatureBranchTargetId]>;
+
+def HasV8_6aOps : SubtargetFeature<
+  "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions",
+  [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
+   FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>;
+
+def HasV8_7aOps : SubtargetFeature<
+  "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
+  [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
+
+def HasV8_8aOps : SubtargetFeature<
+  "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions",
+  [HasV8_7aOps, FeatureHBC, FeatureMOPS]>;
+
+def HasV9_0aOps : SubtargetFeature<
+  "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions",
+  [HasV8_5aOps, FeatureSVE2]>;
+
+def HasV9_1aOps : SubtargetFeature<
+  "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions",
+  [HasV8_6aOps, HasV9_0aOps]>;
+
+def HasV9_2aOps : SubtargetFeature<
+  "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions",
+  [HasV8_7aOps, HasV9_1aOps]>;
+
+def HasV9_3aOps : SubtargetFeature<
+  "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions",
+  [HasV8_8aOps, HasV9_2aOps]>;
+
+def HasV8_0rOps : SubtargetFeature<
+  "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
+  [//v8.1
+  FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
+  //v8.2
+  FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV,
+  //v8.3
+  FeatureComplxNum, FeatureCCIDX, FeatureJS,
+  FeaturePAuth, FeatureRCPC,
+  //v8.4
+  FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI,
+  FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
+  // Not mandatory in v8.0-R, but included here on the grounds that it
+  // only enables names of system registers
+  FeatureSpecRestrict
+  ]>;
+
+// Only intended to be used by disassemblers.
+def FeatureAll
+    : SubtargetFeature<"all", "IsAll", "true", "Enable all instructions", []>;
+
+class AssemblerPredicateWithAll<dag cond, string name="">
+    : AssemblerPredicate<(any_of FeatureAll, cond), name>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+include "AArch64RegisterBanks.td"
+include "AArch64ArkGcCallingConvention.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64Schedule.td"
+include "AArch64InstrInfo.td"
+include "AArch64SchedPredicates.td"
+include "AArch64SchedPredExynos.td"
+include "AArch64SchedPredAmpere.td"
+include "AArch64Combine.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Named operands for MRS/MSR/TLBI/...
+//===----------------------------------------------------------------------===//
+
+include "AArch64SystemOperands.td"
+
+//===----------------------------------------------------------------------===//
+// Access to privileged registers
+//===----------------------------------------------------------------------===//
+
+foreach i = 1-3 in
+def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
+  "true", "Permit use of TPIDR_EL"#i#" for the TLS base">;
+
+//===----------------------------------------------------------------------===//
+// Control codegen mitigation against Straight Line Speculation vulnerability.
+//===----------------------------------------------------------------------===//
+
+def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
+  "HardenSlsRetBr", "true",
+  "Harden against straight line speculation across RET and BR instructions">;
+def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
+  "HardenSlsBlr", "true",
+  "Harden against straight line speculation across BLR instructions">;
+def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat",
+  "HardenSlsNoComdat", "true",
+  "Generate thunk code for SLS mitigation in the normal text section">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors supported.
+//
+
+//===----------------------------------------------------------------------===//
+// Unsupported features to disable for scheduling models
+//===----------------------------------------------------------------------===//
+
+class AArch64Unsupported { list<Predicate> F; }
+
+def SVEUnsupported : AArch64Unsupported {
+  let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
+           HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME];
+}
+
+def PAUnsupported : AArch64Unsupported {
+  let F = [HasPAuth];
+}
+
+def SMEUnsupported : AArch64Unsupported {
+  let F = [HasSME, HasSMEF64, HasSMEI64];
+}
+
+include "AArch64SchedA53.td"
+include "AArch64SchedA55.td"
+include "AArch64SchedA57.td"
+include "AArch64SchedCyclone.td"
+include "AArch64SchedFalkor.td"
+include "AArch64SchedKryo.td"
+include "AArch64SchedExynosM3.td"
+include "AArch64SchedExynosM4.td"
+include "AArch64SchedExynosM5.td"
+include "AArch64SchedThunderX.td"
+include "AArch64SchedThunderX2T99.td"
+include "AArch64SchedA64FX.td"
+include "AArch64SchedThunderX3T110.td"
+include "AArch64SchedTSV110.td"
+include "AArch64SchedAmpere1.td"
+include "AArch64SchedNeoverseN2.td"
+
+def TuneA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+                                "Cortex-A35 ARM processors">;
+
+def TuneA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+                                   "Cortex-A53 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeatureBalanceFPOps,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeaturePostRAScheduler]>;
+
+def TuneA55     : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
+                                   "Cortex-A55 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeaturePostRAScheduler,
+                                   FeatureFuseAddress]>;
+
+def TuneA510    : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
+                                   "Cortex-A510 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeaturePostRAScheduler
+                                   ]>;
+
+def TuneA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+                                   "Cortex-A57 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeatureBalanceFPOps,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFuseAdrpAdd,
+                                   FeatureFuseLiterals,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive]>;
+
+def TuneA65     : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
+                                   "Cortex-A65 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeatureFuseAddress,
+                                   FeatureFuseAdrpAdd,
+                                   FeatureFuseLiterals]>;
+
+def TuneA72     : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+                                   "Cortex-A72 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
+                                   FeatureFuseLiterals]>;
+
+def TuneA73     : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
+                                   "Cortex-A73 ARM processors", [
+                                   FeatureFuseAES]>;
+
+def TuneA75     : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
+                                   "Cortex-A75 ARM processors", [
+                                   FeatureFuseAES]>;
+
+def TuneA76     : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+                                   "Cortex-A76 ARM processors", [
+                                   FeatureFuseAES]>;
+
+def TuneA77     : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
+                                   "Cortex-A77 ARM processors", [
+                                   FeatureCmpBccFusion,
+                                   FeatureFuseAES]>;
+
+def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
+                               "Cortex-A78 ARM processors", [
+                               FeatureCmpBccFusion,
+                               FeatureFuseAES,
+                               FeaturePostRAScheduler]>;
+
+def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
+                                "CortexA78C",
+                                "Cortex-A78C ARM processors", [
+                                FeatureCmpBccFusion,
+                                FeatureFuseAES,
+                                FeaturePostRAScheduler]>;
+
+def TuneA710    : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
+                                   "Cortex-A710 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeaturePostRAScheduler,
+                                   FeatureCmpBccFusion]>;
+
+def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
+                               "CortexR82",
+                               "Cortex-R82 ARM processors", [
+                               FeaturePostRAScheduler]>;
+
+def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
+                                  "Cortex-X1 ARM processors", [
+                                  FeatureCmpBccFusion,
+                                  FeatureFuseAES,
+                                  FeaturePostRAScheduler]>;
+
+def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
+                                  "Cortex-X2 ARM processors", [
+                                  FeatureFuseAES,
+                                  FeaturePostRAScheduler,
+                                  FeatureCmpBccFusion]>;
+
+def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
+                                 "Fujitsu A64FX processors", [
+                                 FeaturePostRAScheduler,
+                                 FeatureAggressiveFMA,
+                                 FeatureArithmeticBccFusion,
+                                 FeaturePredictableSelectIsExpensive
+                                 ]>;
+
+def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
+                                  "Nvidia Carmel processors">;
+
+// Note that cyclone does not fuse AES instructions, but newer apple chips do
+// perform the fusion and cyclone is used by default when targetting apple OSes.
+def TuneAppleA7  : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
+                                    "Apple A7 (the CPU formerly known as Cyclone)", [
+                                    FeatureAlternateSExtLoadCVTF32Pattern,
+                                    FeatureArithmeticBccFusion,
+                                    FeatureArithmeticCbzFusion,
+                                    FeatureDisableLatencySchedHeuristic,
+                                    FeatureFuseAES, FeatureFuseCryptoEOR,
+                                    FeatureZCRegMove,
+                                    FeatureZCZeroing,
+                                    FeatureZCZeroingFPWorkaround]
+                                    >;
+
+def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
+                                    "Apple A10", [
+                                    FeatureAlternateSExtLoadCVTF32Pattern,
+                                    FeatureArithmeticBccFusion,
+                                    FeatureArithmeticCbzFusion,
+                                    FeatureDisableLatencySchedHeuristic,
+                                    FeatureFuseAES,
+                                    FeatureFuseCryptoEOR,
+                                    FeatureZCRegMove,
+                                    FeatureZCZeroing]
+                                    >;
+
+def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
+                                    "Apple A11", [
+                                    FeatureAlternateSExtLoadCVTF32Pattern,
+                                    FeatureArithmeticBccFusion,
+                                    FeatureArithmeticCbzFusion,
+                                    FeatureDisableLatencySchedHeuristic,
+                                    FeatureFuseAES,
+                                    FeatureFuseCryptoEOR,
+                                    FeatureZCRegMove,
+                                    FeatureZCZeroing]
+                                    >;
+
+def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
+                                    "Apple A12", [
+                                    FeatureAlternateSExtLoadCVTF32Pattern,
+                                    FeatureArithmeticBccFusion,
+                                    FeatureArithmeticCbzFusion,
+                                    FeatureDisableLatencySchedHeuristic,
+                                    FeatureFuseAES,
+                                    FeatureFuseCryptoEOR,
+                                    FeatureZCRegMove,
+                                    FeatureZCZeroing]
+                                    >;
+
+def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
+                                    "Apple A13", [
+                                    FeatureAlternateSExtLoadCVTF32Pattern,
+                                    FeatureArithmeticBccFusion,
+                                    FeatureArithmeticCbzFusion,
+                                    FeatureDisableLatencySchedHeuristic,
+                                    FeatureFuseAES,
+                                    FeatureFuseCryptoEOR,
+                                    FeatureZCRegMove,
+                                    FeatureZCZeroing]
+                                    >;
+
+def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
+                                    "Apple A14", [
+                                    FeatureAggressiveFMA,
+                                    FeatureAlternateSExtLoadCVTF32Pattern,
+                                    FeatureArithmeticBccFusion,
+                                    FeatureArithmeticCbzFusion,
+                                    FeatureDisableLatencySchedHeuristic,
+                                    FeatureFuseAddress,
+                                    FeatureFuseAES,
+                                    FeatureFuseArithmeticLogic,
+                                    FeatureFuseCCSelect,
+                                    FeatureFuseCryptoEOR,
+                                    FeatureFuseAdrpAdd,
+                                    FeatureFuseLiterals,
+                                    FeatureZCRegMove,
+                                    FeatureZCZeroing]>;
+
+def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
+                                    "Samsung Exynos-M3 processors",
+                                    [FeatureExynosCheapAsMoveHandling,
+                                     FeatureForce32BitJumpTables,
+                                     FeatureFuseAddress,
+                                     FeatureFuseAES,
+                                     FeatureFuseCCSelect,
+                                     FeatureFuseAdrpAdd,
+                                     FeatureFuseLiterals,
+                                     FeatureLSLFast,
+                                     FeaturePostRAScheduler,
+                                     FeaturePredictableSelectIsExpensive]>;
+
+// Re-uses some scheduling and tunings from the ExynosM3 proc family.
+def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
+                                    "Samsung Exynos-M4 processors",
+                                    [FeatureArithmeticBccFusion,
+                                     FeatureArithmeticCbzFusion,
+                                     FeatureExynosCheapAsMoveHandling,
+                                     FeatureForce32BitJumpTables,
+                                     FeatureFuseAddress,
+                                     FeatureFuseAES,
+                                     FeatureFuseArithmeticLogic,
+                                     FeatureFuseCCSelect,
+                                     FeatureFuseAdrpAdd,
+                                     FeatureFuseLiterals,
+                                     FeatureLSLFast,
+                                     FeaturePostRAScheduler,
+                                     FeatureZCZeroing]>;
+
+def TuneKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+                                   "Qualcomm Kryo processors", [
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive,
+                                   FeatureZCZeroing,
+                                   FeatureLSLFast]
+                                   >;
+
+def TuneFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
+                                   "Qualcomm Falkor processors", [
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive,
+                                   FeatureZCZeroing,
+                                   FeatureLSLFast,
+                                   FeatureSlowSTRQro
+                                   ]>;
+
+def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
+                                      "Neoverse E1 ARM processors", [
+                                      FeaturePostRAScheduler,
+                                      FeatureFuseAES
+                                      ]>;
+
+def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1",
+                                      "Neoverse N1 ARM processors", [
+                                      FeaturePostRAScheduler,
+                                      FeatureFuseAES
+                                      ]>;
+
+def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2",
+                                      "Neoverse N2 ARM processors", [
+                                      FeaturePostRAScheduler,
+                                      FeatureFuseAES
+                                      ]>;
+def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB",
+                                      "Neoverse 512-TVB ARM processors", [
+                                      FeaturePostRAScheduler,
+                                      FeatureFuseAES
+                                      ]>;
+
+def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1",
+                                      "Neoverse V1 ARM processors", [
+                                      FeatureFuseAES,
+                                      FeaturePostRAScheduler]>;
+
+def TuneSaphira  : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
+                                   "Qualcomm Saphira processors", [
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive,
+                                   FeatureZCZeroing,
+                                   FeatureLSLFast]>;
+
+def TuneThunderX2T99  : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
+                                         "Cavium ThunderX2 processors", [
+                                          FeatureAggressiveFMA,
+                                          FeatureArithmeticBccFusion,
+                                          FeaturePostRAScheduler,
+                                          FeaturePredictableSelectIsExpensive]>;
+
+def TuneThunderX3T110  : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
+                                          "ThunderX3T110",
+                                          "Marvell ThunderX3 processors", [
+                                           FeatureAggressiveFMA,
+                                           FeatureArithmeticBccFusion,
+                                           FeaturePostRAScheduler,
+                                           FeaturePredictableSelectIsExpensive,
+                                           FeatureBalanceFPOps,
+                                           FeatureStrictAlign]>;
+
+def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+                                    "Cavium ThunderX processors", [
+                                    FeaturePostRAScheduler,
+                                    FeaturePredictableSelectIsExpensive]>;
+
+def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+                                       "ThunderXT88",
+                                       "Cavium ThunderX processors", [
+                                       FeaturePostRAScheduler,
+                                       FeaturePredictableSelectIsExpensive]>;
+
+def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+                                       "ThunderXT81",
+                                       "Cavium ThunderX processors", [
+                                       FeaturePostRAScheduler,
+                                       FeaturePredictableSelectIsExpensive]>;
+
+def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+                                       "ThunderXT83",
+                                       "Cavium ThunderX processors", [
+                                       FeaturePostRAScheduler,
+                                       FeaturePredictableSelectIsExpensive]>;
+
+def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+                                  "HiSilicon TS-V110 processors", [
+                                  FeatureCustomCheapAsMoveHandling,
+                                  FeatureFuseAES,
+                                  FeaturePostRAScheduler]>;
+
+def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
+                                   "Ampere Computing Ampere-1 processors", [
+                                   FeaturePostRAScheduler,
+                                   FeatureFuseAES,
+                                   FeatureLSLFast,
+                                   FeatureAggressiveFMA,
+                                   FeatureArithmeticBccFusion,
+                                   FeatureCmpBccFusion,
+                                   FeatureFuseAddress,
+                                   FeatureFuseLiterals]>;
+
+def ProcessorFeatures {
+  list<SubtargetFeature> A53  = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+                                 FeatureFPARMv8, FeatureNEON, FeaturePerfMon];
+  list<SubtargetFeature> A55  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureFullFP16, FeatureDotProd,
+                                 FeatureRCPC, FeaturePerfMon];
+  list<SubtargetFeature> A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+                                 FeatureMatMulInt8, FeatureBF16, FeatureAM,
+                                 FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+                                 FeatureFP16FML];
+  list<SubtargetFeature> A65  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureFullFP16, FeatureDotProd,
+                                 FeatureRCPC, FeatureSSBS, FeatureRAS,
+                                 FeaturePerfMon];
+  list<SubtargetFeature> A76  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureFullFP16, FeatureDotProd,
+                                 FeatureRCPC, FeatureSSBS, FeaturePerfMon];
+  list<SubtargetFeature> A77  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureFullFP16, FeatureDotProd,
+                                 FeatureRCPC, FeaturePerfMon, FeatureSSBS];
+  list<SubtargetFeature> A78  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureFullFP16, FeatureDotProd,
+                                 FeatureRCPC, FeaturePerfMon, FeatureSPE,
+                                 FeatureSSBS];
+  list<SubtargetFeature> A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureFullFP16, FeatureDotProd,
+                                 FeatureFlagM, FeatureFP16FML, FeaturePAuth,
+                                 FeaturePerfMon, FeatureRCPC, FeatureSPE,
+                                 FeatureSSBS];
+  list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+                                 FeatureETE, FeatureMTE, FeatureFP16FML,
+                                 FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
+  list<SubtargetFeature> R82  = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
+                                 FeatureFP16FML, FeatureSSBS, FeaturePredRes,
+                                 FeatureSB];
+  list<SubtargetFeature> X1   = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureRCPC, FeaturePerfMon,
+                                 FeatureSPE, FeatureFullFP16, FeatureDotProd,
+                                 FeatureSSBS];
+  list<SubtargetFeature> X1C  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureRCPC, FeaturePerfMon,
+                                 FeatureSPE, FeatureFullFP16, FeatureDotProd,
+                                 FeaturePAuth, FeatureSSBS];
+  list<SubtargetFeature> X2   = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+                                 FeatureMatMulInt8, FeatureBF16, FeatureAM,
+                                 FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+                                 FeatureFP16FML];
+  list<SubtargetFeature> A64FX    = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
+                                     FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
+                                     FeatureSVE, FeatureComplxNum];
+  list<SubtargetFeature> Carmel   = [HasV8_2aOps, FeatureNEON, FeatureCrypto,
+                                     FeatureFullFP16];
+  list<SubtargetFeature> AppleA7  = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8,
+                                     FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg];
+  list<SubtargetFeature> AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8,
+                                     FeatureNEON, FeaturePerfMon, FeatureCRC,
+                                     FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH];
+  list<SubtargetFeature> AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                     FeatureNEON, FeaturePerfMon, FeatureFullFP16];
+  list<SubtargetFeature> AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8,
+                                     FeatureNEON, FeaturePerfMon, FeatureFullFP16];
+  list<SubtargetFeature> AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+                                     FeatureNEON, FeaturePerfMon, FeatureFullFP16,
+                                     FeatureFP16FML, FeatureSHA3];
+  list<SubtargetFeature> AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+                                     FeatureNEON, FeaturePerfMon, FeatureFRInt3264,
+                                     FeatureSpecRestrict, FeatureSSBS, FeatureSB,
+                                     FeaturePredRes, FeatureCacheDeepPersist,
+                                     FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
+                                     FeatureAltFPCmp];
+  list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+                                     FeaturePerfMon];
+  list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+                                     FeatureFullFP16, FeaturePerfMon];
+  list<SubtargetFeature> Falkor   = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+                                     FeatureFPARMv8, FeatureNEON, FeaturePerfMon,
+                                     FeatureRDM];
+  list<SubtargetFeature> NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+                                       FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
+                                       FeatureRCPC, FeatureSSBS, FeaturePerfMon];
+  list<SubtargetFeature> NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+                                       FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
+                                       FeatureRCPC, FeatureSPE, FeatureSSBS,
+                                       FeaturePerfMon];
+  list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
+                                       FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
+                                       FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto,
+                                       FeaturePerfMon];
+  list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
+                                           FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
+                                           FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
+                                           FeaturePerfMon, FeatureRandGen, FeatureSPE,
+                                           FeatureSSBS, FeatureSVE];
+  list<SubtargetFeature> NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
+                                       FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
+                                       FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
+                                       FeaturePerfMon, FeatureRandGen, FeatureSPE,
+                                       FeatureSSBS, FeatureSVE];
+  list<SubtargetFeature> Saphira    = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+                                       FeatureNEON, FeatureSPE, FeaturePerfMon];
+  list<SubtargetFeature> ThunderX   = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+                                       FeatureFPARMv8, FeaturePerfMon, FeatureNEON];
+  list<SubtargetFeature> ThunderX2T99  = [HasV8_1aOps, FeatureCRC, FeatureCrypto,
+                                          FeatureFPARMv8, FeatureNEON, FeatureLSE];
+  list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto,
+                                          FeatureFPARMv8, FeatureNEON, FeatureLSE,
+                                          FeaturePAuth, FeaturePerfMon];
+  list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                   FeatureNEON, FeaturePerfMon, FeatureSPE,
+                                   FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+  list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+                                    FeatureMTE, FeatureSSBS];
+
+  // ETE and TRBE are future architecture extensions. We temporarily enable them
+  // by default for users targeting generic AArch64. The extensions do not
+  // affect code generated by the compiler and can be used only by explicitly
+  // mentioning the new system register names in assembly.
+  list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE];
+}
+
+// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging
+// optimizations.
+def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
+                     [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler]>;
+def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
+                     [TuneA35]>;
+def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53,
+                     [TuneA35]>;
+def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53,
+                     [TuneA53]>;
+def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
+                     [TuneA55]>;
+def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510,
+                     [TuneA510]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
+                     [TuneA57]>;
+def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
+                     [TuneA65]>;
+def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65,
+                     [TuneA65]>;
+def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53,
+                     [TuneA72]>;
+def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53,
+                     [TuneA73]>;
+def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55,
+                     [TuneA75]>;
+def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76,
+                     [TuneA76]>;
+def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76,
+                     [TuneA76]>;
+def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77,
+                     [TuneA77]>;
+def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78,
+                     [TuneA78]>;
+def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C,
+                     [TuneA78C]>;
+def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
+                     [TuneA710]>;
+def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
+                     [TuneR82]>;
+def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
+                     [TuneX1]>;
+def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C,
+                     [TuneX1]>;
+def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
+                     [TuneX2]>;
+def : ProcessorModel<"neoverse-e1", CortexA53Model,
+                     ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
+def : ProcessorModel<"neoverse-n1", CortexA57Model,
+                     ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>;
+def : ProcessorModel<"neoverse-n2", NeoverseN2Model,
+                     ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>;
+def : ProcessorModel<"neoverse-512tvb", NeoverseN2Model,
+                     ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>;
+def : ProcessorModel<"neoverse-v1", NeoverseN2Model,
+                     ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
+def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
+                     [TuneExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4,
+                     [TuneExynosM4]>;
+def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4,
+                     [TuneExynosM4]>;
+def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor,
+                     [TuneFalkor]>;
+def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira,
+                     [TuneSaphira]>;
+def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>;
+
+// Cavium ThunderX/ThunderX T8X  Processors
+def : ProcessorModel<"thunderx", ThunderXT8XModel,  ProcessorFeatures.ThunderX,
+                     [TuneThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel,
+                     ProcessorFeatures.ThunderX, [TuneThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel,
+                     ProcessorFeatures.ThunderX, [TuneThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel,
+                     ProcessorFeatures.ThunderX, [TuneThunderXT83]>;
+// Cavium ThunderX2T9X  Processors. Formerly Broadcom Vulcan.
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
+                     ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>;
+// Marvell ThunderX3T110 Processors.
+def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
+                     ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
+def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
+                     [TuneTSV110]>;
+
+// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
+def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
+                     [TuneAppleA7]>;
+
+// iPhone and iPad CPUs
+def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7,
+                     [TuneAppleA7]>;
+def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7,
+                     [TuneAppleA7]>;
+def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7,
+                     [TuneAppleA7]>;
+def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10,
+                     [TuneAppleA10]>;
+def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11,
+                     [TuneAppleA11]>;
+def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12,
+                     [TuneAppleA12]>;
+def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
+                     [TuneAppleA13]>;
+def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
+                     [TuneAppleA14]>;
+
+// Mac CPUs
+def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
+                     [TuneAppleA14]>;
+
+// watch CPUs.
+def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
+                     [TuneAppleA12]>;
+def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
+                     [TuneAppleA12]>;
+
+// Alias for the latest Apple processor model supported by LLVM.
+def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
+                     [TuneAppleA14]>;
+
+// Fujitsu A64FX
+def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
+                     [TuneA64FX]>;
+
+// Nvidia Carmel
+def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
+                     [TuneCarmel]>;
+
+// Ampere Computing
+def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
+                     [TuneAmpere1]>;
+
+//===----------------------------------------------------------------------===//
+// Assembly parser
+//===----------------------------------------------------------------------===//
+
+def GenericAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string Name = "generic";
+  string BreakCharacters = ".";
+  string TokenizingCharacters = "[]*!/";
+}
+
+def AppleAsmParserVariant : AsmParserVariant {
+  int Variant = 1;
+  string Name = "apple-neon";
+  string BreakCharacters = ".";
+  string TokenizingCharacters = "[]*!/";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// AArch64 Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def GenericAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+  bit isMCAsmWriter = 1;
+}
+
+def AppleAsmWriter : AsmWriter {
+  let AsmWriterClassName = "AppleInstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 1;
+  int isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+  let InstructionSet = AArch64InstrInfo;
+  let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
+  let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
+  let AllowRegisterRenaming = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "AArch64PfmCounters.td"
diff --git a/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td b/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td
new file mode 100644
index 0000000000000000000000000000000000000000..5190ea36c3b68c48dd5ffda9270ebe0874145c52
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64ArkGcCallingConvention.td
@@ -0,0 +1,591 @@
+//=- AArch64ArkGcCallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for AArch64 architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfBigEndian - Match only if we're in big endian mode.
+class CCIfBigEndian<CCAction A> :
+  CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
+
+class CCIfILP32<CCAction A> :
+  CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS64 Calling Convention
+//===----------------------------------------------------------------------===//
+
+let Entry = 1 in
+def CC_AArch64_AAPCS : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+
+  // Big endian vectors must be passed as if they were 1-element vectors so that
+  // their lanes are in a consistent order.
+  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
+                         CCBitConvertToType<f64>>>,
+  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
+                         CCBitConvertToType<f128>>>,
+
+  // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
+  // However, on windows, in some circumstances, the SRet is passed in X0 or X1
+  // instead.  The presence of the inreg attribute indicates that SRet is
+  // passed in the alternative register (X0 or X1), not X8:
+  // - X0 for non-instance methods.
+  // - X1 for instance methods.
+
+  // The "sret" attribute identifies indirect returns.
+  // The "inreg" attribute identifies non-aggregate types.
+  // The position of the "sret" attribute identifies instance/non-instance
+  // methods.
+  // "sret" on argument 0 means non-instance methods.
+  // "sret" on argument 1 means instance methods.
+
+  CCIfInReg<CCIfType<[i64],
+    CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
+
+  CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
+
+  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+  // slot is 64-bit.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // The 'nest' parameter, if any, is passed in X18.
+  // Darwin uses X18 as the platform register and hence 'nest' isn't currently
+  // supported there.
+  CCIfNest<CCAssignToReg<[X18]>>,
+
+  // Pass SwiftSelf in a callee saved register.
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
+
+  // A SwiftError is passed in X21.
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Pass SwiftAsync in an otherwise callee saved register so that it will be
+  // preserved for normal function calls.
+  CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
+
+  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCPassIndirect<i64>>,
+
+  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCAssignToReg<[P0, P1, P2, P3]>>,
+  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCPassIndirect<i64>>,
+
+  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
+  // up to eight each of GPR and FPR.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  // i128 is split to two i64s, we can't fit half to register X7.
+  CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
+                                                    [X0, X1, X3, X5]>>>,
+
+  // i128 is split to two i64s, and its stack alignment is 16 bytes.
+  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
+
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  // If more than will fit in registers, pass them on the stack instead.
+  CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>,
+  CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
+  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
+           CCAssignToStack<8, 8>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToStack<16, 16>>
+]>;
+
+let Entry = 1 in
+def RetCC_AArch64_AAPCS : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+
+  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Big endian vectors must be passed as if they were 1-element vectors so that
+  // their lanes are in a consistent order.
+  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
+                         CCBitConvertToType<f64>>>,
+  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
+                         CCBitConvertToType<f128>>>,
+
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+      CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+      CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+
+  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCAssignToReg<[P0, P1, P2, P3]>>
+]>;
+
+// Vararg functions on windows pass floats in integer registers
+let Entry = 1 in
+def CC_AArch64_Win64_VarArg : CallingConv<[
+  CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[f64], CCBitConvertToType<i64>>,
+  CCDelegateTo<CC_AArch64_AAPCS>
+]>;
+
+// Windows Control Flow Guard checks take a single argument (the target function
+// address) and have no return value.
+let Entry = 1 in
+def CC_AArch64_Win64_CFGuard_Check : CallingConv<[
+  CCIfType<[i64], CCAssignToReg<[X15]>>
+]>;
+
+
+// Darwin uses a calling convention which differs in only two ways
+// from the standard one at this level:
+//     + i128s (i.e. split i64s) don't need even registers.
+//     + Stack slots are sized as needed rather than being at least 64-bit.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+  // An SRet is passed in X8, not X0 like a normal pointer parameter.
+  CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
+
+  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+  // slot is 64-bit.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // Pass SwiftSelf in a callee saved register.
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
+
+  // A SwiftError is passed in X21.
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Pass SwiftAsync in an otherwise callee saved register so that it will be
+  // preserved for normal function calls.
+  CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
+
+  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
+  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
+  // up to eight each of GPR and FPR.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  // i128 is split to two i64s, we can't fit half to register X7.
+  CCIfType<[i64],
+           CCIfSplit<CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6]>>>,
+  // i128 is split to two i64s, and its stack alignment is 16 bytes.
+  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
+
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  // If more than will fit in registers, pass them on the stack instead.
+  CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
+  CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16",
+  CCAssignToStack<2, 2>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+  // Re-demote pointers to 32-bits so we don't end up storing 64-bit
+  // values and clobbering neighbouring stack locations. Not very pretty.
+  CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+  CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
+
+  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
+           CCAssignToStack<8, 8>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToStack<16, 16>>
+]>;
+
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
+
+  // Handle all scalar types as either i64 or f64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+  CCIfType<[f16, bf16, f32], CCPromoteToType<f64>>,
+
+  // Everything is on the stack.
+  // i128 is split to two i64s, and its stack alignment is 16 bytes.
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToStack<8, 8>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToStack<16, 16>>
+]>;
+
+// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
+// same as the normal Darwin VarArgs handling.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+  // Handle all scalar types as either i32 or f32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[f16, bf16], CCPromoteToType<f32>>,
+
+  // Everything is on the stack.
+  // i128 is split to two i64s, and its stack alignment is 16 bytes.
+  CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToStack<8, 8>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToStack<16, 16>>
+]>;
+
+
+// The WebKit_JS calling convention only passes the first argument (the callee)
+// in register and the remaining arguments on stack. We allow 32bit stack slots,
+// so that WebKit can write partial values in the stack and define the other
+// 32bit quantity as undef.
+let Entry = 1 in
+def CC_AArch64_WebKit_JS : CallingConv<[
+  // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[W0]>>,
+  CCIfType<[i64], CCAssignToReg<[X0]>>,
+
+  // Pass the remaining arguments on the stack instead.
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+let Entry = 1 in
+def RetCC_AArch64_WebKit_JS : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
+]>;
+
+// OHOS_LOCAL begin
+// Ark Conventions
+let Entry = 1 in
+def CC_AArch64_ArkInt : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W20, W21, W22, W23, W24, W25, W26, W28], [X20, X21, X22, X23, X24, X25, X26, X28]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X20, X21, X22, X23, X24, X25, X26, X28], [W20, W21, W22, W23, W24, W25, W26, W28]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkFast0 : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W28, W29], [X28, FP]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X28, FP], [W28, W29]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkFast1 : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W28, W29], [X0, X28, FP]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X28, FP], [W0, W28, W29]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkFast2 : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W28, W29], [X0, X1, X28, FP]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X28, FP], [W0, W1, W28, W29]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkFast3 : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W28, W29], [X0, X1, X2, X28, FP]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X28, FP], [W0, W1, W2, W28, W29]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkFast4 : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W28, W29], [X0, X1, X2, X3, X28, FP]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X28, FP], [W0, W1, W2, W3, W28, W29]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkFast5 : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W28, W29], [X0, X1, X2, X3, X4, X28, FP]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X28, FP], [W0, W1, W2, W3, W4, W28, W29]>>,
+]>;
+
+let Entry = 1 in
+def CC_AArch64_ArkResolver : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W16], [X16]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X16], [W16]>>,
+]>;
+
+let Entry = 1 in
+def RetCC_AArch64_ArkResolver : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i32], CCAssignToRegWithShadow<[W16], [X16]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[X16], [W16]>>,
+]>;
+// OHOS_LOCAL end
+
+//===----------------------------------------------------------------------===//
+// ARM64 Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+// This calling convention is specific to the Glasgow Haskell Compiler.
+// The only documentation is the GHC source code, specifically the C header
+// file:
+//
+//     https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
+//
+// which defines the registers for the Spineless Tagless G-Machine (STG) that
+// GHC uses to implement lazy evaluation. The generic STG machine has a set of
+// registers which are mapped to appropriate set of architecture specific
+// registers for each CPU architecture.
+//
+// The STG Machine is documented here:
+//
+//    https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
+//
+// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
+// register mapping".
+
+let Entry = 1 in
+def CC_AArch64_GHC : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
+
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+  CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
+  CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
+
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+  CCIfType<[i64], CCAssignToReg<[X19, FP, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
+]>;
+
+// The order of the callee-saves in this file is important, because the
+// FrameLowering code will use this order to determine the layout the
+// callee-save area in the stack frame. As can be observed below, Darwin
+// requires the frame-record (LR, FP) to be at the top the callee-save area,
+// whereas for other platforms they are at the bottom.
+
+// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
+// presumably a callee to someone. External functions may not do so, but this
+// is currently safe since BL has LR as an implicit-def and what happens after a
+// tail call doesn't matter.
+//
+// It would be better to model its preservation semantics properly (create a
+// vreg on entry, use it in RET & tail call generation; make that vreg def if we
+// end up saving LR as part of a call frame). Watch this space...
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                           X25, X26, X27, X28,
+                                           D8,  D9,  D10, D11,
+                                           D12, D13, D14, D15,
+                                           LR, FP)>;
+
+// A variant for treating X18 as callee saved, when interfacing with
+// code that needs X18 to be preserved.
+def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>;
+
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                               X25, X26, X27, X28, FP, LR,
+                                               D8, D9, D10, D11,
+                                               D12, D13, D14, D15)>;
+
+// The Control Flow Guard check call uses a custom calling convention that also
+// preserves X0-X8 and Q0-Q7.
+def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
+                                               (sequence "X%u", 0, 8),
+                                               (sequence "Q%u", 0, 7))>;
+
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                          X25, X26, X27, X28, LR, FP,
+                                          (sequence "Q%u", 8, 23))>;
+
+// Functions taking SVE arguments or returning an SVE type
+// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
+                                                 (sequence "P%u", 4, 15),
+                                                 X19, X20, X21, X22, X23, X24,
+                                                 X25, X26, X27, X28, LR, FP)>;
+
+def CSR_AArch64_AAPCS_SwiftTail
+    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>;
+
+// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
+// 'this' and the pointer return value are both passed in X0 in these cases,
+// this can be partially modelled by treating X0 as a callee-saved register;
+// only the resulting RegMask is used; the SaveList is ignored
+//
+// (For generic ARM 64-bit ABI code, clang will not generate constructors or
+// destructors with 'this' returns, so this RegMask will not be used in that
+// case)
+def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
+
+def CSR_AArch64_AAPCS_SwiftError
+    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
+
+// The ELF stub used for TLS-descriptor access saves every feasible
+// register. Only X0 and LR are clobbered.
+def CSR_AArch64_TLS_ELF
+    : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
+                           (sequence "Q%u", 0, 31))>;
+
+def CSR_AArch64_AllRegs
+    : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
+                           (sequence "X%u", 0, 28), FP, LR, SP,
+                           (sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
+                           (sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
+                           (sequence "Q%u", 0, 31))>;
+
+def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_AArch64_RT_MostRegs :  CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+                                                (sequence "X%u", 9, 15))>;
+
+def CSR_AArch64_StackProbe_Windows
+    : CalleeSavedRegs<(add (sequence "X%u", 0, 15),
+                           (sequence "X%u", 18, 28), FP, SP,
+                           (sequence "Q%u", 0, 31))>;
+
+// OHOS_LOCAL begin
+def CSR_AArch64_ArkInt : CalleeSavedRegs<(add FP)>;
+
+def CSR_AArch64_ArkFast5
+    : CalleeSavedRegs<(add (sub (sequence "X%u", 5, 27), X16, X17), LR,
+                           (sequence "D%u", 0, 31))>;
+def CSR_AArch64_ArkFast4
+    : CalleeSavedRegs<(add CSR_AArch64_ArkFast5, X4)>;
+
+def CSR_AArch64_ArkFast3
+    : CalleeSavedRegs<(add CSR_AArch64_ArkFast4, X3)>;
+
+def CSR_AArch64_ArkFast2
+    : CalleeSavedRegs<(add CSR_AArch64_ArkFast3, X2)>;
+
+def CSR_AArch64_ArkFast1
+    : CalleeSavedRegs<(add CSR_AArch64_ArkFast2, X1)>;
+
+def CSR_AArch64_ArkFast0
+    : CalleeSavedRegs<(add CSR_AArch64_ArkFast1, X0)>;
+
+def CSR_AArch64_ArkMethod : CalleeSavedRegs<(add LR, FP)>;
+// OHOS_LOCAL end
+
+// Darwin variants of AAPCS.
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+                                                X23, X24, X25, X26, X27, X28,
+                                                D8,  D9,  D10, D11,
+                                                D12, D13, D14, D15)>;
+
+def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21,
+                                                 X22, X23, X24, X25, X26, X27,
+                                                 X28, (sequence "Q%u", 8, 23))>;
+def CSR_Darwin_AArch64_AAPCS_ThisReturn
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftError
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftTail
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>;
+
+// The function used by Darwin to obtain the address of a thread-local variable
+// guarantees more than a normal AAPCS function. x16 and x17 are used on the
+// fast path for calculation, but other registers except X0 (argument/return)
+// and LR (it is a call, after all) are preserved.
+def CSR_Darwin_AArch64_TLS
+    : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
+                           FP,
+                           (sequence "Q%u", 0, 31))>;
+
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_Darwin_AArch64_TLS,
+// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS.
+def CSR_Darwin_AArch64_CXX_TLS
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
+                           (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19),
+                           (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_Darwin_AArch64_CXX_TLS_PE
+    : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_Darwin_AArch64_CXX_TLS_ViaCopy
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>;
+
+def CSR_Darwin_AArch64_RT_MostRegs
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
+
+// Variants of the standard calling conventions for shadow call stack.
+// These all preserve x18 in addition to any other registers.
+def CSR_AArch64_NoRegs_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
+def CSR_AArch64_AllRegs_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
+def CSR_AArch64_AAPCS_SwiftError_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
+def CSR_AArch64_RT_MostRegs_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
+def CSR_AArch64_SVE_AAPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
+def CSR_AArch64_AAPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fa4bbadd0995fcc0dfe276d4095ee82bd8385c92..8856fe0ea5554f1bd17287b66a852da6b0b726fd 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1403,6 +1403,18 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
   }
 }
 
+#ifdef ARK_GC_SUPPORT
+Triple::ArchType AArch64FrameLowering::GetArkSupportTarget() const
+{
+    return Triple::aarch64;
+}
+
+int AArch64FrameLowering::GetFixedFpPosition() const
+{
+  return -1;
+}
+#endif
+
 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1501,8 +1513,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
+  #ifndef ARK_GC_SUPPORT
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
+  #endif
+  // asm-int GHC call webkit function, we need push regs to stack.
 
   // OHOS_LOCAL begin
   if (HasFP && (MF.getFunction().getCallingConv() == CallingConv::ArkFast0 ||
@@ -1970,8 +1985,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
+  #ifndef ARK_GC_SUPPORT
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
+  #endif
+  // asm-int GHC call webkit function, we need push regs to stack.
 
   // How much of the stack used by incoming arguments this function is expected
   // to restore in this particular epilogue.
@@ -2995,8 +3013,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                 RegScavenger *RS) const {
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
+  #ifndef ARK_GC_SUPPORT
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
+  #endif
+  // asm-int GHC call webkit function, we need push regs to stack.
 
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 8a3dbefb124ed88eedea584b345f32cb48d5bd41..221f92856c5573367ba6d3abc4a2a5b667e2c9d9 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -16,6 +16,9 @@
 #include "AArch64StackProtectorRetLowering.h" // OHOS_LOCAL
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Support/TypeSize.h"
+#ifdef ARK_GC_SUPPORT
+#include "llvm/ADT/Triple.h"
+#endif
 
 namespace llvm {
 
@@ -42,6 +45,10 @@ public:
   /// the function.
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+#ifdef ARK_GC_SUPPORT
+  Triple::ArchType GetArkSupportTarget() const override;
+  int GetFixedFpPosition() const override;
+#endif
 
   const StackProtectorRetLowering *getStackProtectorRet() const override; // OHOS_LOCAL
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7c5b5251aed33e833d999ed0053cf90fb74733aa..283da2a3dcf5ec81b5a0ab92942030ed8edc4ac3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6239,8 +6239,13 @@ SDValue AArch64TargetLowering::LowerCallResult(
 
 /// Return true if the calling convention is one that we can guarantee TCO for.
 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
+#ifdef ARK_GC_SUPPORT
+  return ((CC == CallingConv::GHC || CC == CallingConv::Fast) && GuaranteeTailCalls) ||
+         CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
+#else
   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
+#endif
 }
 
 /// Return true if we might ever do TCO for calls with this calling convention.
@@ -6491,8 +6496,13 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
 
 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
                                                    bool TailCallOpt) const {
+#ifdef ARK_GC_SUPPORT
+  return ((CallCC == CallingConv::GHC || CallCC == CallingConv::Fast) && TailCallOpt) ||
+         CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
+#else
   return (CallCC == CallingConv::Fast && TailCallOpt) ||
          CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
+#endif
 }
 
 // Check if the value is zero-extended from i1 to i8
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 95294679f3eed1a5a4a3a03af67eacd1b7c1565b..49c45cec1e911469029559c5a30c6f656d14a7b5 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -354,6 +354,17 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   if (TFI->hasFP(MF) || TT.isOSDarwin())
     markSuperRegs(Reserved, AArch64::W29);
 
+#ifdef ARK_GC_SUPPORT
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC) {
+    markSuperRegs(Reserved, AArch64::W29);
+    markSuperRegs(Reserved, AArch64::W30);
+  }
+  if ((MF.getFunction().getCallingConv() == CallingConv::WebKit_JS) ||
+    (MF.getFunction().getCallingConv() == CallingConv::C)) {
+    markSuperRegs(Reserved, AArch64::W30);
+  }
+#endif
+
   for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
     if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
       markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index ecf8be3c8503dc7f9e716994ed980cbd5ac7d41e..e3d4d5494fa90b28971936de2b954be811c19cd4 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -1,7 +1,10 @@
 add_llvm_component_group(AArch64 HAS_JIT)
 
-set(LLVM_TARGET_DEFINITIONS AArch64.td)
-
+if(BUILD_ARK_GC_SUPPORT)
+    set(LLVM_TARGET_DEFINITIONS AArch64ArkGc.td)
+else()
+    set(LLVM_TARGET_DEFINITIONS AArch64.td)
+endif(BUILD_ARK_GC_SUPPORT)
 tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 48277a8504115d0c5d02c27c095d8db18fb2453f..62bf5f439fe6f8059e23bd9193aff7d662255d9f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -341,8 +341,13 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
 } // namespace
 
 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
+#ifdef ARK_GC_SUPPORT
+  return ((CallConv == CallingConv::GHC || CallConv == CallingConv::Fast) && TailCallOpt) ||
+         CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
+#else
   return (CallConv == CallingConv::Fast && TailCallOpt) ||
          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
+#endif
 }
 
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -662,8 +667,13 @@ bool AArch64CallLowering::lowerFormalArguments(
 
 /// Return true if the calling convention is one that we can guarantee TCO for.
 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
+#ifdef ARK_GC_SUPPORT
+  return ((CC == CallingConv::GHC || CC == CallingConv::Fast) && GuaranteeTailCalls) ||
+         CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
+#else
   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
+#endif
 }
 
 /// Return true if we might ever do TCO for calls with this calling convention.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 58b60c19448721332468da138073efc6281b3718..0267b00aefba2dc5226447c1d77cc3b6d1179c31 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1481,6 +1481,37 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
   - for 32-bit code, substitute %e?? registers for %r??
 */
 
+#ifdef ARK_GC_SUPPORT
+Triple::ArchType X86FrameLowering::GetArkSupportTarget() const
+{
+    return Is64Bit ? Triple::x86_64 : Triple::x86;
+}
+
+int X86FrameLowering::GetFixedFpPosition() const
+{
+  return 2;
+}
+
+int X86FrameLowering::GetFrameReserveSize(MachineFunction &MF) const
+{
+    int slotSize = sizeof(uint64_t);
+    if (!Is64Bit) {
+      slotSize = sizeof(uint32_t);
+    }
+    int reserveSize = 0;
+    MF.getFunction()
+      .getFnAttribute("frame-reserved-slots")
+      .getValueAsString()
+      .getAsInteger(10, reserveSize);
+
+    // x86-64 shoule align 16 bytes
+    if (Is64Bit) {
+      return RoundUp(reserveSize, 2 * sizeof(uint64_t));
+    }
+    return reserveSize;
+}
+#endif
+
 void X86FrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
@@ -1760,6 +1791,20 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     else
       MFI.setOffsetAdjustment(-StackSize);
   }
+#ifdef ARK_GC_SUPPORT
+    // push marker
+    if (MF.getFunction().hasFnAttribute("frame-reserved-slots"))
+    {
+      unsigned StackPtr = TRI->getStackRegister();
+      int reserveSize = GetFrameReserveSize(MF);
+      const unsigned SUBOpc =
+        getSUBriOpcode(Uses64BitFramePtr, reserveSize);
+      BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
+        .addReg(StackPtr)
+        .addImm(reserveSize)
+        .setMIFlag(MachineInstr::FrameSetup);
+    }
+#endif
 
   // For EH funclets, only allocate enough space for outgoing calls. Save the
   // NumBytes value that we would've used for the parent frame.
@@ -2226,6 +2271,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   // AfterPop is the position to insert .cfi_restore.
   MachineBasicBlock::iterator AfterPop = MBBI;
   if (HasFP) {
+#ifdef ARK_GC_SUPPORT
+    if (MF.getFunction().hasFnAttribute("frame-reserved-slots"))
+    {
+
+      int reserveSize = GetFrameReserveSize(MF);
+      int slotSize = sizeof(uint32_t);
+      if (Is64Bit) {
+        slotSize = sizeof(uint64_t);
+      }
+      for (int i = 0; i < reserveSize / slotSize; i++) {
+        BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+          MachineFramePtr)
+          .setMIFlag(MachineInstr::FrameDestroy);
+      }
+    }
+#endif
     if (X86FI->hasSwiftAsyncContext()) {
       // Discard the context.
       int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
@@ -2650,6 +2711,12 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     }
   }
 
+#ifdef ARK_GC_SUPPORT
+  int reserveSize = GetFrameReserveSize(MF);
+  SpillSlotOffset -= reserveSize; // skip frame reserved
+  CalleeSavedFrameSize += reserveSize;
+#endif
+
   // Assign slots for GPRs. It increases frame size.
   for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
     Register Reg = I.getReg();
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index e6a6241fdda039fa6506871328f8e9a293fae8ba..9783c7ca2d358cce6a48864fdde196ba5a38e776 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -74,6 +74,11 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
+#ifdef ARK_GC_SUPPORT
+  Triple::ArchType GetArkSupportTarget() const override;
+  int GetFixedFpPosition() const override;
+  int GetFrameReserveSize(MachineFunction &MF) const override;
+#endif
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a75ee58ad7f75ebaa0df7aaae7b8c877a48bbb6f..c22915ce3fbf852854fa2b9ba253d0846f1f1ea3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4400,8 +4400,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     report_fatal_error("failed to perform tail call elimination on a call "
                        "site marked musttail");
 
+#ifdef ARK_GC_SUPPORT
+   assert(!(isVarArg && canGuaranteeTCO(CallConv) && (CallConv != CallingConv::GHC)) &&
+         "Var args not supported with calling convention fastcc, ghc or hipe");
+#else
   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
          "Var args not supported with calling convention fastcc, ghc or hipe");
+#endif
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 381901e74d9f7d4d5408970d7e4a79c0e37a299a..f6f92bddeff6f9cc50f0c3b7df6ed198cd8a8484 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -611,6 +611,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
       Reserved.set(SubReg);
   }
+#ifdef ARK_GC_SUPPORT
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC) {
+    for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
+      Reserved.set(SubReg);
+  }
+#endif
 
   // Set the base-pointer register and its aliases as reserved if needed.
   if (hasBasePointer(MF)) {