From bbd1d385d472e82d838ff60fa04477319c6d51eb Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 20 Oct 2022 10:47:44 -0700 Subject: [PATCH 1/2] [AArch64] Fix scheduler crash in fusion code. Make sure we don't call getReg() on the first operand of instruction without knowing that operand is actually a register. (This codepath isn't enabled for most CPUs; only triggers on certain CPUs, like Cortex-X1.) Differential Revision: https://reviews.llvm.org/D136296 Signed-off-by: Roman Zhuykov --- .../lib/Target/AArch64/AArch64MacroFusion.cpp | 5 ++-- .../CodeGen/AArch64/misched-fusion-cmp.mir | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index c7657f37d16d..f51c27c62dfb 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -30,8 +30,9 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI, // If we're in CmpOnly mode, we only fuse arithmetic instructions that // discard their result. - if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR || - FirstMI->getOperand(0).getReg() == AArch64::WZR)) { + if (CmpOnly && FirstMI->getOperand(0).isReg() && + !(FirstMI->getOperand(0).getReg() == AArch64::XZR || + FirstMI->getOperand(0).getReg() == AArch64::WZR)) { return false; } diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir b/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir new file mode 100644 index 000000000000..b0450c5b8c01 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir @@ -0,0 +1,29 @@ +# RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mcpu=cortex-x1 -run-pass=machine-scheduler +# Just ensure this doesn't crash. + +--- +name: crash +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1(0x00000000), %bb.2(0x80000000) + liveins: $w0, $x1 + + %1:gpr64common = COPY $x1 + %0:gpr32common = COPY $w0 + %3:gpr64sp = COPY $xzr + INLINEASM &"", 9 /* sideeffect mayload attdialect */, 196622 /* mem:m */, %3 + %4:gpr32 = ADDSWri %0, 1, 0, implicit-def $nzcv + STRWui %4, %1, 0 :: (store (s32)) + Bcc 3, %bb.2, implicit killed $nzcv + B %bb.1 + + bb.1: + successors: + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.2: + RET_ReallyLR +... -- Gitee From 680f691b02b7de8ea8b83ad7f9b6b7db8f151b3c Mon Sep 17 00:00:00 2001 From: Nechitaev Dmitry Date: Fri, 21 Apr 2023 13:44:03 +0300 Subject: [PATCH 2/2] [LLVMAOT] Implement ArkFrame and Ark spill slot placement Ark Frame is applied for methods with 'use-ark-frame' metadata, module should describe the frame in 'ark.frame.info' metadata. Ark stack slot placement applied for calls with 'use-ark-spills' attribute. Signed-off-by: Roman Zhuykov --- llvm/include/llvm/CodeGen/MachineFrameInfo.h | 42 +++++ llvm/include/llvm/CodeGen/MachineFunction.h | 8 + .../llvm/CodeGen/TargetFrameLowering.h | 13 ++ llvm/lib/CodeGen/MachineFunction.cpp | 46 +++++ llvm/lib/CodeGen/PrologEpilogInserter.cpp | 35 +++- .../SelectionDAG/StatepointLowering.cpp | 171 +++++++++++++++--- .../CodeGen/SelectionDAG/StatepointLowering.h | 24 ++- .../Target/AArch64/AArch64FrameLowering.cpp | 33 ++++ .../lib/Target/AArch64/AArch64FrameLowering.h | 8 + llvm/lib/Target/X86/X86FrameLowering.cpp | 20 ++ llvm/lib/Target/X86/X86FrameLowering.h | 8 + llvm/lib/Target/X86/X86RegisterInfo.cpp | 18 ++ llvm/test/CodeGen/AArch64/ark-spills.ll | 29 +++ llvm/test/CodeGen/X86/GC/ark-spills.ll | 27 +++ 14 files changed, 454 insertions(+), 28 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/ark-spills.ll create mode 100644 llvm/test/CodeGen/X86/GC/ark-spills.ll diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index c5adc1b254bb..7016972cd57f 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -122,6 +122,16 @@ private: struct StackObject { // The offset of this object from the stack pointer on entry to // the function. This field has no meaning for a variable sized element. + // OHOS_LOCAL begin + // For ArkSpills this field represents the offset from the FP: + // * LLVM IR Module should have a named metadata 'ark.frame.info' + // which contains frame size and offsets of caller saved registers. + // * During DAG generation, class StatepointLowering allocates new + // StackObjects and mark them as ArkSpill, plus it sets certain SPOffset + // for each StackObject according to aforementioned metadata. + // * Prolog/Epilog inserter checks the type of selected StackObject, for + // those marked as ArkSpill straightforward lowering is used: FP+SPOffset. + // OHOS_LOCAL end int64_t SPOffset; // The size of this object on the stack. 0 means a variable sized object, @@ -147,6 +157,11 @@ private: /// register allocator. bool isStatepointSpillSlot = false; + // OHOS_LOCAL begin + /// If true, this stack slot is used to access ArkFrame slots + bool isArkSpillSlot = false; + // OHOS_LOCAL end + /// Identifier for stack memory type analagous to address space. If this is /// non-0, the meaning is target defined. Offsets cannot be directly /// compared between objects with different stack IDs. The object may not @@ -210,6 +225,11 @@ private: /// The list of stack objects allocated. std::vector Objects; + // OHOS_LOCAL begin + /// The amount of stack objects that will be pointed inside Ark's frame + unsigned NumArkSpills = 0; + // OHOS_LOCAL end + /// This contains the number of fixed objects contained on /// the stack. Because fixed objects are stored at a negative index in the /// Objects list, this is also the index to the 0th object in the list. @@ -433,6 +453,8 @@ public: /// Return the number of objects. unsigned getNumObjects() const { return Objects.size(); } + unsigned getNumArkSpills() const { return NumArkSpills; } // OHOS_LOCAL + /// Map a frame index into the local object block void mapLocalFrameObject(int ObjectIndex, int64_t Offset) { LocalFrameObjects.push_back(std::pair(ObjectIndex, Offset)); @@ -738,6 +760,14 @@ public: return Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot; } + // OHOS_LOCAL begin + bool isArkSpillSlotObjectIndex(int ObjectIdx) const { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + return Objects[ObjectIdx+NumFixedObjects].isArkSpillSlot; + } + // OHOS_LOCAL end + /// \see StackID uint8_t getStackID(int ObjectIdx) const { return Objects[ObjectIdx+NumFixedObjects].StackID; @@ -774,6 +804,18 @@ public: assert(isStatepointSpillSlotObjectIndex(ObjectIdx) && "inconsistent"); } + // OHOS_LOCAL begin + void markAsArkSpillSlotObjectIndex(int ObjectIdx) { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + if (!Objects[ObjectIdx+NumFixedObjects].isArkSpillSlot) { + NumArkSpills++; + Objects[ObjectIdx+NumFixedObjects].isArkSpillSlot = true; + } + assert(isArkSpillSlotObjectIndex(ObjectIdx) && "inconsistent"); + } + // OHOS_LOCAL end + /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index fc1188186ac4..958c5b9625c2 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -577,6 +577,14 @@ public: MachineFunction &operator=(const MachineFunction &) = delete; ~MachineFunction(); + // OHOS_LOCAL begin + unsigned getMaxArkSpills() const; + + int getArkSpillOffset(int ArgIdx) const; + + int getArkFrameSize() const; + // OHOS_LOCAL end + /// Reset the instance as if it was just created. void reset() { clear(); diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 0e0d6b6292d7..6cd595e87e64 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -272,6 +272,19 @@ public: return false; } + // OHOS_LOCAL begin + /// Return true if the target implements spilling & restoring caller-saved + /// registers from Ark spill slots. + virtual bool supportsArkSpills() const { + return false; + } + + /// Return offset of Ark frame adaptation + virtual int getArkFrameAdaptationOffset(const MachineFunction &MF) const { + return 0; + } + // OHOS_LOCAL end + /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee /// saved registers and returns true if it isn't possible / profitable to do /// so by issuing a series of load instructions via loadRegToStackSlot(). diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 6b481a374382..84a55c44e26d 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -236,6 +236,52 @@ MachineFunction::~MachineFunction() { clear(); } +// OHOS_LOCAL begin +unsigned MachineFunction::getMaxArkSpills() const { + const auto *TFI = getSubtarget().getFrameLowering(); + if (!TFI->supportsArkSpills()) + return 0; + + const auto *Module = F.getParent(); + auto *ArkFrameInfoMd = Module->getNamedMetadata("ark.frame.info"); + assert(ArkFrameInfoMd != nullptr && "ArkSpills require ark.frame.info MD"); + + constexpr auto ArkInfoOffsetsIdx = 1U; + return ArkFrameInfoMd->getOperand(ArkInfoOffsetsIdx)->getNumOperands(); +} + +static ssize_t getConstantFromArkFrameMeta(NamedMDNode *MD, int Idx0, int Idx1) { + auto Meta = dyn_cast(MD->getOperand(Idx0)->getOperand(Idx1)); + return dyn_cast(Meta->getValue())->getSExtValue(); +} + +int MachineFunction::getArkSpillOffset(int ArgIdx) const { + const auto *Module = F.getParent(); + auto *ArkFrameInfoMd = Module->getNamedMetadata("ark.frame.info"); + assert(ArkFrameInfoMd != nullptr && "ArkSpills require ark.frame.info MD"); + + // Extract frame size + constexpr auto ArkInfoAdaptationId = 0U; + auto FrameAdaptationSize = + getConstantFromArkFrameMeta(ArkFrameInfoMd, ArkInfoAdaptationId, 0); + + // And inner offset in it + constexpr auto ArkInfoOffsetsIdx = 1U; + auto InnerOffset = + getConstantFromArkFrameMeta(ArkFrameInfoMd, ArkInfoOffsetsIdx, ArgIdx); + return FrameAdaptationSize + InnerOffset; +} + +int MachineFunction::getArkFrameSize() const { + const auto *Module = F.getParent(); + auto *ArkFrameInfoMd = Module->getNamedMetadata("ark.frame.info"); + assert(ArkFrameInfoMd != nullptr && "ArkSpills require ark.frame.info MD"); + + constexpr auto ArkInfoFrameSizeIdx = 2U; + return getConstantFromArkFrameMeta(ArkFrameInfoMd, ArkInfoFrameSizeIdx, 0); +} +// OHOS_LOCAL end + void MachineFunction::clear() { Properties.reset(); // Don't call destructors on MachineInstr and MachineOperand. All of their diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index b1caeefab81d..2cac1b55c3dd 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -671,6 +671,15 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) { // Assign stack slots for any callee-saved registers that must be spilled. assignCalleeSavedSpillSlots(MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); + // OHOS_LOCAL begin + bool NeedPadding = F.getMetadata("use-ark-frame") != nullptr; + NeedPadding &= TFI->supportsArkSpills(); + if (NeedPadding) { + auto FrameSize = MF.getArkFrameSize(); + MFI.CreateFixedObject(FrameSize, -FrameSize, false); + } + // OHOS_LOCAL end + // Add the code to save and restore the callee saved registers. if (!F.hasFnAttribute(Attribute::Naked)) { MFI.setCalleeSavedInfoValid(true); @@ -1112,6 +1121,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (ProtectedObjs.count(i)) continue; + // OHOS_LOCAL begin + if (MFI.isArkSpillSlotObjectIndex(i)) + continue; + // OHOS_LOCAL end // Only allocate objects on the default stack. if (MFI.getStackID(i) != TargetStackID::Default) continue; @@ -1497,17 +1510,29 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, // implementation other than historical accident. The only // remaining difference is the unconditional use of the stack // pointer as the base register. + MachineFrameInfo &MFI = MF.getFrameInfo(); // OHOS_LOCAL if (MI.getOpcode() == TargetOpcode::STATEPOINT) { assert((!MI.isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); Register Reg; MachineOperand &Offset = MI.getOperand(i + 1); - StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( - MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); - assert(!refOffset.getScalable() && - "Frame offsets with a scalable component are not supported"); - Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); + // OHOS_LOCAL begin + int FI = MI.getOperand(i).getIndex(); + if (!MFI.isArkSpillSlotObjectIndex(FI) || !TFI->supportsArkSpills()) { + StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( + MF, FI, Reg, /*IgnoreSPUpdates*/ false); + assert(!refOffset.getScalable() && + "Frame offsets with a scalable component are not supported"); + Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); + } else { + // Ark Spills require only offset over FP + Reg = TRI.getFrameRegister(MF); + auto Adaptation = TFI->getArkFrameAdaptationOffset(MF); + Offset.setImm(MFI.getObjectOffset(FI) + Adaptation); + } + // OHOS_LOCAL end + MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index c5c093ae228f..03315c57aaa6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -32,8 +32,10 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" // OHOS_LOCAL #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" // OHOS_LOCAL #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GCStrategy.h" @@ -106,9 +108,12 @@ void StatepointLoweringState::clear() { "cleared before statepoint sequence completed"); } +// OHOS_LOCAL begin SDValue StatepointLoweringState::allocateStackSlot(EVT ValueType, - SelectionDAGBuilder &Builder) { + SelectionDAGBuilder &Builder, + bool ArkSpill) { +// OHOS_LOCAL end NumSlotsAllocatedForStatepoints++; MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); @@ -127,17 +132,22 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, assert(AllocatedStackSlots.size() == Builder.FuncInfo.StatepointStackSlots.size() && "Broken invariant"); - - for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { - if (!AllocatedStackSlots.test(NextSlotToAllocate)) { - const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; - if (MFI.getObjectSize(FI) == SpillSize) { - AllocatedStackSlots.set(NextSlotToAllocate); - // TODO: Is ValueType the right thing to use here? - return Builder.DAG.getFrameIndex(FI, ValueType); + // OHOS_LOCAL begin + if (!ArkSpill) { + for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { + if (!AllocatedStackSlots.test(NextSlotToAllocate)) { + const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; + if (MFI.isArkSpillSlotObjectIndex(FI)) + continue; + if (MFI.getObjectSize(FI) == SpillSize) { + AllocatedStackSlots.set(NextSlotToAllocate); + // TODO: Is ValueType the right thing to use here? + return Builder.DAG.getFrameIndex(FI, ValueType); + } } } } + // OHOS_LOCAL end // Couldn't find a free slot, so create a new one: @@ -154,6 +164,13 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, StatepointMaxSlotsRequired.updateMax( Builder.FuncInfo.StatepointStackSlots.size()); + // OHOS_LOCAL begin + if (ArkSpill) { + ArkFrameIndices.push_back(FI); + MFI.markAsArkSpillSlotObjectIndex(FI); + } + // OHOS_LOCAL end + return SpillSlot; } @@ -289,6 +306,16 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, if (!Index) return; + // OHOS_LOCAL begin + // Prevent using ArkSpill slots for reservation + // TODO: actually can use slot FI if at the current statepoint lowering + // will not use FI for saving argument + MachineFunction &MF = Builder.DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isArkSpillSlotObjectIndex(*Index)) + return; + // OHOS_LOCAL end + const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; auto SlotIt = find(StatepointSlots, *Index); @@ -373,17 +400,27 @@ static MachineMemOperand* getMachineMemOperand(MachineFunction &MF, /// is a null constant. Return pair with first element being frame index /// containing saved value and second element with outgoing chain from the /// emitted store +// OHOS_LOCAL begin static std::tuple spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, - SelectionDAGBuilder &Builder) { + SelectionDAGBuilder &Builder, + Optional AssignedFI) { +// OHOS_LOCAL end SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); MachineMemOperand* MMO = nullptr; // Emit new store if we didn't do it for this ptr before if (!Loc.getNode()) { - Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), - Builder); - int Index = cast(Loc)->getIndex(); + // OHOS_LOCAL begin + int Index; + if (!AssignedFI.has_value()) { + Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), + Builder); + Index = cast(Loc)->getIndex(); + } else { + Index = AssignedFI.value(); + } + // OHOS_LOCAL end // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); @@ -421,11 +458,14 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. +// OHOS_LOCAL begin static void lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, SmallVectorImpl &Ops, SmallVectorImpl &MemRefs, - SelectionDAGBuilder &Builder) { + SelectionDAGBuilder &Builder, + Optional AssignedFI) { +// OHOS_LOCAL end if (willLowerDirectly(Incoming)) { if (FrameIndexSDNode *FI = dyn_cast(Incoming)) { @@ -488,7 +528,9 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, // will happily do so as needed, so doing it here would be a small compile // time win at most. SDValue Chain = Builder.getRoot(); - auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); + // OHOS_LOCAL begin + auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder, AssignedFI); + // OHOS_LOCAL end Ops.push_back(std::get<0>(Res)); if (auto *MMO = std::get<2>(Res)) MemRefs.push_back(MMO); @@ -510,6 +552,70 @@ static bool isGCValue(const Value *V, SelectionDAGBuilder &Builder) { return true; // conservative } +// OHOS_LOCAL begin +/// Return a set of assigned Stack Slots for arguments that represents the GC +/// value. This function can assign Stack Slots only for functions marked +/// by ArkPlt calling convention. +static DenseMap +tryAssignStackSlots(SelectionDAGBuilder &Builder, const GCStatepointInst *Inst) { + DenseMap AssignedArkSlots; + + const auto &MF = Builder.DAG.getMachineFunction(); + const auto *TFI = MF.getSubtarget().getFrameLowering(); + if (!Inst || !Inst->hasFnAttr("use-ark-spills") || !TFI->supportsArkSpills()) + return AssignedArkSlots; // return an empty DenseMap + + // Save info about args that may be stored into Phys Regs during the + // call instruction lowering. Only args that represent GC reference + // can use ArkSpills. + unsigned AvailableArkSpills = MF.getMaxArkSpills(); + using VInfo = std::tuple; + SmallVector Args; + for (const Value *Arg : Inst->actual_args()) { + auto idx = Args.size(); + bool byVal = Inst->paramHasAttr(idx, Attribute::ByVal); + Args.emplace_back(Arg, isGCValue(Arg, Builder), byVal); + } + + // If there are no any gc value in a list of collected args, then exit. + auto GCRefOnReg = [](VInfo &Info) -> bool { return std::get<1>(Info); }; + if (std::none_of(Args.begin(), Args.end(), GCRefOnReg)) + return AssignedArkSlots; + + // Create ArkSpills if they were not created. + auto &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + auto &SL = Builder.StatepointLowering; + // If we did not allocate Ark spills for the MF do it + if (MFI.getNumArkSpills() == 0) { + // Clear previous frame indices + SL.dropArkSpills(); + } + for (unsigned I = SL.getArkSpillsCount(); I < AvailableArkSpills; ++I) { + constexpr bool RequireArkSpill = true; + auto FrameIndexTy = Builder.getFrameIndexTy(); + auto Loc = SL.allocateStackSlot(FrameIndexTy, Builder, RequireArkSpill); + int FI = cast(Loc)->getIndex(); + MFI.setObjectOffset(FI, MF.getArkSpillOffset(I)); + } + + // Assign Slots + unsigned ArgReg = 0; + for (auto Vinfo : Args) { + auto V = std::get<0>(Vinfo); + auto IsGCRef = std::get<1>(Vinfo); + auto HasByVal = std::get<2>(Vinfo); + if (IsGCRef && !HasByVal) + AssignedArkSlots[V] = SL.getArkSpillByIdx(ArgReg); + // Increment only if argument can be passed via GRP regs + ArgReg += V->getType()->isIntOrPtrTy() && !HasByVal; + if (ArgReg >= AvailableArkSpills) + break; + } + + return AssignedArkSlots; +} +// OHOS_LOCAL end + /// Lower deopt state and gc pointer arguments of the statepoint. The actual /// lowering is described in lowerIncomingStatepointValue. This function is /// responsible for lowering everything in the right position and playing some @@ -591,6 +697,7 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, SmallSetVector LoweredGCPtrs; // Map lowered GC Pointer value to the index in above vector DenseMap GCPtrIndexMap; + DenseMap GCNodeToGCValue; // OHOS_LOCAL unsigned CurNumVRegs = 0; @@ -606,6 +713,7 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, SDValue PtrSD = Builder.getValue(V); if (!LoweredGCPtrs.insert(PtrSD)) return; // skip duplicates + GCNodeToGCValue[PtrSD] = V; // OHOS_LOCAL GCPtrIndexMap[PtrSD] = LoweredGCPtrs.size() - 1; assert(!LowerAsVReg.count(PtrSD) && "must not have been seen"); @@ -638,25 +746,30 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, return !(LiveInDeopt || UseRegistersForDeoptValues); }; + // OHOS_LOCAL begin + auto StatepointInst = dyn_cast_or_null(SI.StatepointInstr); + auto AssignedArkSlots = tryAssignStackSlots(Builder, StatepointInst); + // OHOS_LOCAL end + // Before we actually start lowering (and allocating spill slots for values), // reserve any stack slots which we judge to be profitable to reuse for a // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - if (requireSpillSlot(V)) + if (requireSpillSlot(V) && !AssignedArkSlots.count(V)) // OHOS_LOCAL reservePreviousStackSlotForValue(V, Builder); } for (const Value *V : SI.Ptrs) { SDValue SDV = Builder.getValue(V); - if (!LowerAsVReg.count(SDV)) + if (!LowerAsVReg.count(SDV) && !AssignedArkSlots.count(V)) // OHOS_LOCAL reservePreviousStackSlotForValue(V, Builder); } for (const Value *V : SI.Bases) { SDValue SDV = Builder.getValue(V); - if (!LowerAsVReg.count(SDV)) + if (!LowerAsVReg.count(SDV) && !AssignedArkSlots.count(V)) // OHOS_LOCAL reservePreviousStackSlotForValue(V, Builder); } @@ -675,22 +788,36 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, // the frame index. if (const Argument *Arg = dyn_cast(V)) { int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg); - if (FI != INT_MAX) + if (FI != INT_MAX && !AssignedArkSlots.count(V)) // OHOS_LOCAL Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy()); } if (!Incoming.getNode()) Incoming = Builder.getValue(V); LLVM_DEBUG(dbgs() << "Value " << *V << " requireSpillSlot = " << requireSpillSlot(V) << "\n"); + // OHOS_LOCAL begin + Optional AssignedFI; + if (AssignedArkSlots.count(V)) { + AssignedFI = AssignedArkSlots[V]; + } + // OHOS_LOCAL end lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs, - Builder); + Builder, AssignedFI); // OHOS_LOCAL } // Finally, go ahead and lower all the gc arguments. pushStackMapConstant(Ops, Builder, LoweredGCPtrs.size()); - for (SDValue SDV : LoweredGCPtrs) + // OHOS_LOCAL begin + for (SDValue SDV : LoweredGCPtrs) { + Optional AssignedFI; + auto V = GCNodeToGCValue[SDV]; + if (AssignedArkSlots.count(V)) { + AssignedFI = AssignedArkSlots[V]; + } lowerIncomingStatepointValue(SDV, !LowerAsVReg.count(SDV), Ops, MemRefs, - Builder); + Builder, AssignedFI); + } + // OHOS_LOCAL end // Copy to out vector. LoweredGCPtrs will be empty after this point. GCPtrs = LoweredGCPtrs.takeVector(); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index addc0a7eef3a..867ab7baab84 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -85,9 +85,12 @@ public: // TODO: Should add consistency tracking to ensure we encounter // expected gc_result calls too. + // OHOS_LOCAL begin /// Get a stack slot we can use to store an value of type ValueType. This /// will hopefully be a recylced slot from another statepoint. - SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); + SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder, + bool ArkSpill = false); + // OHOS_LOCAL end void reserveStackSlot(int Offset) { assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && @@ -103,6 +106,20 @@ public: return AllocatedStackSlots.test(Offset); } + // OHOS_LOCAL begin + int getArkSpillsCount() const { + return ArkFrameIndices.size(); + } + + int getArkSpillByIdx(int idx) const { + return ArkFrameIndices[idx]; + } + + void dropArkSpills() { + ArkFrameIndices.clear(); + } + // OHOS_LOCAL end + private: /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) @@ -117,6 +134,11 @@ private: /// Points just beyond the last slot known to have been allocated unsigned NextSlotToAllocate = 0; + // OHOS_LOCAL begin + /// A list for allocated spill slots that points into Ark Frame + SmallVector ArkFrameIndices; + // OHOS_LOCAL end + /// Keep track of pending gcrelocate calls for consistency check SmallVector PendingGCRelocateCalls; }; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 85e3426b049e..fa4bbadd0995 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -299,6 +299,30 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, return ArgumentPopSize; } +// OHOS_LOCAL begin +int AArch64FrameLowering::getArkFrameAdaptationOffset( + const MachineFunction &MF) const { + + const auto &F = MF.getFunction(); + if (F.getMetadata("use-ark-frame") != nullptr) + return 0x10; // FP & LR + + const auto &FRI = MF.getFrameInfo(); + const auto &CSI = FRI.getCalleeSavedInfo(); + auto End = CSI.end(); + auto FP_iter = std::find_if(CSI.begin(), End, [](auto &csi) { + return csi.getReg() == AArch64::FP; + }); + + assert(FP_iter != End && + "FP register has not been callee-saved as expected"); + + int32_t offset = FRI.getObjectOffset(FP_iter->getFrameIdx()); + assert(offset < 0 && "FP register is expected to be above SP"); + return -offset; +} +// OHOS_LOCAL end + static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); @@ -3325,6 +3349,15 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFI = MF.getFrameInfo(); + // OHOS_LOCAL begin + int ArkSpillNo = 0; + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) + if (MFI.isArkSpillSlotObjectIndex(I)) { + MFI.setObjectOffset(I, MF.getArkSpillOffset(ArkSpillNo)); + ArkSpillNo++; + } + // OHOS_LOCAL end + assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index bab56e9f8645..8a3dbefb124e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -70,6 +70,14 @@ public: /// Can this function use the red zone for local allocations. bool canUseRedZone(const MachineFunction &MF) const; + // OHOS_LOCAL begin + bool supportsArkSpills() const override { + return true; + } + + int getArkFrameAdaptationOffset(const MachineFunction &MF) const override; + // OHOS_LOCAL end + bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index f006386f7508..58b60c194487 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -89,6 +89,17 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { MF.getInfo()->getHasPushSequences(); } +// OHOS_LOCAL begin +int +X86FrameLowering::getArkFrameAdaptationOffset(const MachineFunction &MF) const { + const auto &F = MF.getFunction(); + if (F.getMetadata("use-ark-frame") != nullptr) + return 0; + // FP + LR + return 0x10; +} +// OHOS_LOCAL end + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -3867,6 +3878,15 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { + // OHOS_LOCAL begin + auto &MFI = MF.getFrameInfo(); + int ArkSpillNo = 0; + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) + if (MFI.isArkSpillSlotObjectIndex(I)) { + MFI.setObjectOffset(I, MF.getArkSpillOffset(ArkSpillNo)); + ArkSpillNo++; + } + // OHOS_LOCAL end if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 13176a290bbe..e6a6241fdda0 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -102,6 +102,14 @@ public: MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; + // OHOS_LOCAL begin + bool supportsArkSpills() const override { + return true; + } + + int getArkFrameAdaptationOffset(const MachineFunction &MF) const override; + // OHOS_LOCAL end + bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index abde64b61b85..381901e74d9f 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -843,6 +843,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); // OHOS_LOCAL MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); @@ -896,11 +897,28 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { assert(BasePtr == FramePtr && "Expected the FP as base register"); int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; + // OHOS_LOCAL begin + if (MFI.isArkSpillSlotObjectIndex(FrameIndex)) { + assert(TFI->supportsArkSpills()); + auto Adaptation = TFI->getArkFrameAdaptationOffset(MF); + Offset = MFI.getObjectOffset(FrameIndex) + Adaptation; + } + // OHOS_LOCAL end MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } if (MI.getOperand(FIOperandNum+3).isImm()) { + // OHOS_LOCAL begin + if (MFI.isArkSpillSlotObjectIndex(FrameIndex)) { + assert(BasePtr == FramePtr && "Expected the FP as base register"); + assert(TFI->supportsArkSpills()); + auto Adaptation = TFI->getArkFrameAdaptationOffset(MF); + int64_t Offset = MFI.getObjectOffset(FrameIndex) + Adaptation; + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); + return; + } + // OHOS_LOCAL end // Offset is a 32-bit integer. int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; diff --git a/llvm/test/CodeGen/AArch64/ark-spills.ll b/llvm/test/CodeGen/AArch64/ark-spills.ll new file mode 100644 index 000000000000..14ed3046a1b8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ark-spills.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s | FileCheck %s +; RUN: llc -opaque-pointers < %s | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define weak void @foo(ptr addrspace(1) %ref) { + ret void +} + +define i32 @main(i32 %idx, ptr addrspace(1) %ref) #0 gc "statepoint-example" { + ; CHECK-LABEL: main: + ; CHECK: mov x0, x1 + ; CHECK-NEXT: str x1, [x29, #192] + ; CHECK-NEXT: bl foo + ; Check that the first argument will saved into Ark Frame + %tok = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 12, i32 0, ptr elementtype(void (ptr addrspace(1))) @foo, i32 1, i32 0, ptr addrspace(1) %ref, i32 0, i32 0) #1 ["gc-live"(ptr addrspace(1) %ref)] + %ref.rel = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %tok, i32 0, i32 0) + ret i32 0 +} + +declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32 immarg, i32 immarg) + +attributes #0 = { nounwind "frame-pointer"="all" } +attributes #1 = { "use-ark-spills" } + +!ark.frame.info = !{!0, !1} +!0 = !{i32 304} +!1 = !{i32 -128} diff --git a/llvm/test/CodeGen/X86/GC/ark-spills.ll b/llvm/test/CodeGen/X86/GC/ark-spills.ll new file mode 100644 index 000000000000..89172c72d1f9 --- /dev/null +++ b/llvm/test/CodeGen/X86/GC/ark-spills.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +; RUN: llc -opaque-pointers < %s | FileCheck %s + +define weak void @foo(ptr addrspace(1) %ref) { + ret void +} + +define i32 @main(i32 %idx, ptr addrspace(1) %ref) #0 gc "statepoint-example" { + ; CHECK-LABEL: main: + ; CHECK: movq %rsi, %rdi + ; CHECK-NEXT: movq %rsi, 192(%rbp) + ; CHECK-NEXT: callq foo@PLT + ; Check that the first argument will saved into Ark Frame + %tok = call arkpltcc token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 12, i32 0, ptr elementtype(void (ptr addrspace(1))) @foo, i32 1, i32 0, ptr addrspace(1) %ref, i32 0, i32 0) #1 ["gc-live"(ptr addrspace(1) %ref)] + %ref.rel = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %tok, i32 0, i32 0) + ret i32 0 +} + +declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32 immarg, i32 immarg) + +attributes #0 = { nounwind "frame-pointer"="all" } +attributes #1 = { "use-ark-spills" } + +!ark.frame.info = !{!0, !1} +!0 = !{i32 304} +!1 = !{i32 -128} -- Gitee