diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index eabc4aabea0628ecae8c46686b93cbbb85ccdd28..f09d1129b128a35b75eb2479de23bee88a953b3c 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1067,15 +1067,6 @@ void CodeGenModule::Release() { "sign-return-address-with-bkey", 1); } - if (CodeGenOpts.StackClashProtector) - getModule().addModuleFlag( - llvm::Module::Override, "probe-stack", - llvm::MDString::get(TheModule.getContext(), "inline-asm")); - - if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) - getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size", - CodeGenOpts.StackProbeSize); - if (!CodeGenOpts.MemoryProfileOutput.empty()) { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag( @@ -2270,10 +2261,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (CodeGenOpts.StackClashProtector) B.addAttribute("probe-stack", "inline-asm"); - if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) - B.addAttribute("stack-probe-size", - std::to_string(CodeGenOpts.StackProbeSize)); - if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7e78e4d8d3516b27b51ff5284cf1401e267a305f..e55cee095e3281ae8de58dde1a9511d93801e162 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3479,7 +3479,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, return; if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() && - !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64()) + !EffectiveTriple.isPPC64()) return; Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection, diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c index dab9ee768c28745040923b8a5bba85f97755db26..67571f5cdb2c14c3bf92efe60a6be4a47e297a8d 100644 --- a/clang/test/CodeGen/stack-clash-protection.c +++ b/clang/test/CodeGen/stack-clash-protection.c @@ -1,9 +1,8 @@ // Check the correct function attributes are generated -// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s -// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s // CHECK: define{{.*}} void @large_stack() #[[A:.*]] { void large_stack(void) { @@ -12,18 +11,15 @@ void large_stack(void) { stack[i] = i; } -// CHECK: define{{.*}} void @vla({{.*}}) #[[A]] { +// CHECK: define{{.*}} void @vla({{.*}}) #[[A:.*]] { void vla(int n) { volatile int vla[n]; __builtin_memset(&vla[0], 0, 1); } -// CHECK: define{{.*}} void @builtin_alloca({{.*}}) #[[A]] { +// CHECK: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] { void builtin_alloca(int n) { volatile void *mem = __builtin_alloca(n); } -// CHECK: attributes #[[A]] = {{.*}}"probe-stack"="inline-asm" {{.*}}"stack-probe-size"="8192" - -// CHECK: !{i32 4, !"probe-stack", !"inline-asm"} -// CHECK: !{i32 8, !"stack-probe-size", i32 8192} +// CHECK: attributes #[[A]] = {{.*}} "probe-stack"="inline-asm" diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 7abbd1f03f16324456ef45d95df7dc842a54ded3..a568edd0e640d93873a19adf5872ef04428f82bd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -400,11 +400,7 @@ public: LegalizeResult lowerUnmergeValues(MachineInstr &MI); LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI); LegalizeResult lowerShuffleVector(MachineInstr &MI); - Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, - Align Alignment, LLT PtrTy); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); - LegalizeResult lowerStackSave(MachineInstr &MI); - LegalizeResult lowerStackRestore(MachineInstr &MI); LegalizeResult lowerExtract(MachineInstr &MI); LegalizeResult lowerInsert(MachineInstr &MI); LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index c92ce6dc701c8ed8c3ef1b276ed0c8dbad04128e..186bea75ae96475a7980736873a061ee07e731d9 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -763,12 +763,6 @@ HANDLE_TARGET_OPCODE(G_JUMP_TABLE) /// Generic dynamic stack allocation. HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC) -/// Generic stack pointer save. -HANDLE_TARGET_OPCODE(G_STACKSAVE) - -/// Generic stack pointer restore. -HANDLE_TARGET_OPCODE(G_STACKRESTORE) - /// Strict floating point instructions. HANDLE_TARGET_OPCODE(G_STRICT_FADD) HANDLE_TARGET_OPCODE(G_STRICT_FSUB) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index e8cfaeab3cd805ca6a2fbda51c1fabd29190390a..00d56d1c4bd55ced90f6f3cacadfd86f6f78ef51 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -225,18 +225,6 @@ def G_DYN_STACKALLOC : GenericInstruction { let hasSideEffects = true; } -def G_STACKSAVE : GenericInstruction { - let OutOperandList = (outs ptype0:$dst); - let InOperandList = (ins); - let hasSideEffects = true; -} - -def G_STACKRESTORE : GenericInstruction { - let OutOperandList = (outs); - let InOperandList = (ins ptype0:$src); - let hasSideEffects = true; -} - def G_FREEZE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index e4b837c6b8ce90ec1ddd7daf69478e86d1179104..9a67a8d05a4dda80b078d1ad48de6299a6a00276 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2229,12 +2229,31 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } case Intrinsic::stacksave: { - MIRBuilder.buildInstr(TargetOpcode::G_STACKSAVE, {getOrCreateVReg(CI)}, {}); + // Save the stack pointer to the location provided by the intrinsic. + Register Reg = getOrCreateVReg(CI); + Register StackPtr = MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + // If the target doesn't specify a stack pointer, then fall back. + if (!StackPtr) + return false; + + MIRBuilder.buildCopy(Reg, StackPtr); return true; } case Intrinsic::stackrestore: { - MIRBuilder.buildInstr(TargetOpcode::G_STACKRESTORE, {}, - {getOrCreateVReg(*CI.getArgOperand(0))}); + // Restore the stack pointer from the location provided by the intrinsic. + Register Reg = getOrCreateVReg(*CI.getArgOperand(0)); + Register StackPtr = MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + // If the target doesn't specify a stack pointer, then fall back. + if (!StackPtr) + return false; + + MIRBuilder.buildCopy(StackPtr, Reg); return true; } case Intrinsic::cttz: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 5557456e706d2a97699d82680dab8fd808d552b4..f0da0d88140f2d1a51fe62a8199161206517bdbe 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3503,10 +3503,6 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerShuffleVector(MI); case G_DYN_STACKALLOC: return lowerDynStackAlloc(MI); - case G_STACKSAVE: - return lowerStackSave(MI); - case G_STACKRESTORE: - return lowerStackRestore(MI); case G_EXTRACT: return lowerExtract(MI); case G_INSERT: @@ -6777,12 +6773,21 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { return Legalized; } -Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, - Register AllocSize, - Align Alignment, - LLT PtrTy) { +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + const auto &MF = *MI.getMF(); + const auto &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + Align Alignment = assumeAligned(MI.getOperand(2).getImm()); + + LLT PtrTy = MRI.getType(Dst); LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -6797,25 +6802,7 @@ Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); } - return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0); -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { - const auto &MF = *MI.getMF(); - const auto &TFI = *MF.getSubtarget().getFrameLowering(); - if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register AllocSize = MI.getOperand(1).getReg(); - Align Alignment = assumeAligned(MI.getOperand(2).getImm()); - - LLT PtrTy = MRI.getType(Dst); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - Register SPTmp = - getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); - + SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); MIRBuilder.buildCopy(SPReg, SPTmp); MIRBuilder.buildCopy(Dst, SPTmp); @@ -6823,28 +6810,6 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerStackSave(MachineInstr &MI) { - Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); - if (!StackPtr) - return UnableToLegalize; - - MIRBuilder.buildCopy(MI.getOperand(0), StackPtr); - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerStackRestore(MachineInstr &MI) { - Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); - if (!StackPtr) - return UnableToLegalize; - - MIRBuilder.buildCopy(StackPtr, MI.getOperand(0)); - MI.eraseFromParent(); - return Legalized; -} - LegalizerHelper::LegalizeResult LegalizerHelper::lowerExtract(MachineInstr &MI) { auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index fe21173f531fd72ab31808b51e9302af1349ae54..4d5676f341017270c556407795a2b5b209b0692e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -300,7 +300,6 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); -static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF); /// Returns true if a homogeneous prolog or epilog code can be emitted @@ -462,11 +461,6 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { /// included as part of the stack frame. bool AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - // The stack probing code for the dynamically allocated outgoing arguments - // area assumes that the stack is probed at the top - either by the prologue - // code, which issues a probe if `hasVarSizedObjects` return true, or by the - // most recent variable-sized object allocation. Changing the condition here - // may need to be followed up by changes to the probe issuing logic. return !MF.getFrameInfo().hasVarSizedObjects(); } @@ -475,9 +469,6 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( MachineBasicBlock::iterator I) const { const AArch64InstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); - const AArch64TargetLowering *TLI = - MF.getSubtarget().getTargetLowering(); - MachineFrameInfo &MFI = MF.getFrameInfo(); DebugLoc DL = I->getDebugLoc(); unsigned Opc = I->getOpcode(); bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); @@ -504,24 +495,6 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( // Most call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); - - if (TLI->hasInlineStackProbe(MF) && - -Amount >= AArch64::StackProbeMaxUnprobedStack) { - // When stack probing is enabled, the decrement of SP may need to be - // probed. We only need to do this if the call site needs 1024 bytes of - // space or more, because a region smaller than that is allowed to be - // unprobed at an ABI boundary. We rely on the fact that SP has been - // probed exactly at this point, either by the prologue or most recent - // dynamic allocation. - assert(MFI.hasVarSizedObjects() && - "non-reserved call frame without var sized objects?"); - Register ScratchReg = - MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); - inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0)); - } else { - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(Amount), TII); - } emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(Amount), TII); } @@ -698,154 +671,6 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores( emitCalleeSavedRestores(MBB, MBBI, true); } -// Return the maximum possible number of bytes for `Size` due to the -// architectural limit on the size of a SVE register. -static int64_t upperBound(StackOffset Size) { - static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16; - return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed(); -} - -void AArch64FrameLowering::allocateStackSpace( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, - bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, - bool FollowupAllocs) const { - - if (!AllocSize) - return; - - DebugLoc DL; - MachineFunction &MF = *MBB.getParent(); - const AArch64Subtarget &Subtarget = MF.getSubtarget(); - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - AArch64FunctionInfo &AFI = *MF.getInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - - const int64_t MaxAlign = MFI.getMaxAlign().value(); - const uint64_t AndMask = ~(MaxAlign - 1); - - if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) { - Register TargetReg = RealignmentPadding - ? findScratchNonCalleeSaveRegister(&MBB) - : AArch64::SP; - // SUB Xd/SP, SP, AllocSize - emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, - EmitCFI, InitialOffset); - - if (RealignmentPadding) { - // AND SP, X9, 0b11111...0000 - BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) - .addReg(TargetReg, RegState::Kill) - .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) - .setMIFlags(MachineInstr::FrameSetup); - AFI.setStackRealigned(true); - - // No need for SEH instructions here; if we're realigning the stack, - // we've set a frame pointer and already finished the SEH prologue. - assert(!NeedsWinCFI); - } - return; - } - - // - // Stack probing allocation. - // - - // Fixed length allocation. If we don't need to re-align the stack and don't - // have SVE objects, we can use a more efficient sequence for stack probing. - if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) { - Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB); - assert(ScratchReg != AArch64::NoRegister); - BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC)) - .addDef(ScratchReg) - .addImm(AllocSize.getFixed()) - .addImm(InitialOffset.getFixed()) - .addImm(InitialOffset.getScalable()); - // The fixed allocation may leave unprobed bytes at the top of the - // stack. If we have subsequent alocation (e.g. if we have variable-sized - // objects), we need to issue an extra probe, so these allocations start in - // a known state. - if (FollowupAllocs) { - // STR XZR, [SP] - BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(MachineInstr::FrameSetup); - } - - return; - } - - // Variable length allocation. - - // If the (unknown) allocation size cannot exceed the probe size, decrement - // the stack pointer right away. - int64_t ProbeSize = AFI.getStackProbeSize(); - if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) { - Register ScratchReg = RealignmentPadding - ? findScratchNonCalleeSaveRegister(&MBB) - : AArch64::SP; - assert(ScratchReg != AArch64::NoRegister); - // SUB Xd, SP, AllocSize - emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, - EmitCFI, InitialOffset); - if (RealignmentPadding) { - // AND SP, Xn, 0b11111...0000 - BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) - .addReg(ScratchReg, RegState::Kill) - .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) - .setMIFlags(MachineInstr::FrameSetup); - AFI.setStackRealigned(true); - } - if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding > - AArch64::StackProbeMaxUnprobedStack) { - // STR XZR, [SP] - BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(MachineInstr::FrameSetup); - } - return; - } - - // Emit a variable-length allocation probing loop. - // TODO: As an optimisation, the loop can be "unrolled" into a few parts, - // each of them guaranteed to adjust the stack by less than the probe size. - Register TargetReg = findScratchNonCalleeSaveRegister(&MBB); - assert(TargetReg != AArch64::NoRegister); - // SUB Xd, SP, AllocSize - emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, - EmitCFI, InitialOffset); - - if (RealignmentPadding) { - // AND Xn, Xn, 0b11111...0000 - BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg) - .addReg(TargetReg, RegState::Kill) - .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) - .setMIFlags(MachineInstr::FrameSetup); - } - - BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR)) - .addReg(TargetReg); - if (EmitCFI) { - // Set the CFA register back to SP. - unsigned Reg = - Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true); - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - if (RealignmentPadding) - AFI.setStackRealigned(true); -} - static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { switch (Reg.id()) { default: @@ -1029,11 +854,9 @@ bool AArch64FrameLowering::canUseAsPrologue( MachineBasicBlock *TmpMBB = const_cast(&MBB); const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); - // Don't need a scratch register if we're not going to re-align the stack or - // emit stack probes. - if (!RegInfo->hasStackRealignment(*MF) && TLI->hasInlineStackProbe(*MF)) + // Don't need a scratch register if we're not going to re-align the stack. + if (!RegInfo->hasStackRealignment(*MF)) return true; // Otherwise, we can use any block as long as it has a scratch register // available. @@ -1043,11 +866,15 @@ bool AArch64FrameLowering::canUseAsPrologue( static bool windowsRequiresStackProbe(MachineFunction &MF, uint64_t StackSizeInBytes) { const AArch64Subtarget &Subtarget = MF.getSubtarget(); - const AArch64FunctionInfo &MFI = *MF.getInfo(); + if (!Subtarget.isTargetWindows()) + return false; + const Function &F = MF.getFunction(); // TODO: When implementing stack protectors, take that into account // for the probe threshold. - return Subtarget.isTargetWindows() && MFI.hasStackProbing() && - StackSizeInBytes >= uint64_t(MFI.getStackProbeSize()); + unsigned StackProbeSize = + F.getFnAttributeAsParsedInteger("stack-probe-size", 4096); + return (StackSizeInBytes >= StackProbeSize) && + !F.hasFnAttribute("no-stack-arg-probe"); } static bool needsWinCFI(const MachineFunction &MF) { @@ -1812,7 +1639,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Alignment is required for the parent frame, not the funclet const bool NeedsRealignment = NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); - const int64_t RealignmentPadding = + int64_t RealignmentPadding = (NeedsRealignment && MFI.getMaxAlign() > Align(16)) ? MFI.getMaxAlign().value() - 16 : 0; @@ -1942,14 +1769,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } } - StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; + StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; // Process the SVE callee-saves to determine what space needs to be // allocated. if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize - << "\n"); // Find callee save instructions in frame. CalleeSavesBegin = MBBI; assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); @@ -1957,34 +1782,67 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; CalleeSavesEnd = MBBI; - SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); - SVELocalsSize = SVEStackSize - SVECalleeSavesSize; + AllocateBefore = StackOffset::getScalable(CalleeSavedSize); + AllocateAfter = SVEStackSize - AllocateBefore; } // Allocate space for the callee saves (if any). - StackOffset CFAOffset = - StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); - StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); - allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, - nullptr, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); - CFAOffset += SVECalleeSavesSize; + emitFrameOffset( + MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII, + MachineInstr::FrameSetup, false, false, nullptr, + EmitAsyncCFI && !HasFP && AllocateBefore, + StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); if (EmitAsyncCFI) emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); - // Allocate space for the rest of the frame including SVE locals. Align the - // stack as necessary. - assert(!(canUseRedZone(MF) && NeedsRealignment) && - "Cannot use redzone with stack realignment"); - if (!canUseRedZone(MF)) { - // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have - // the correct value here, as NumBytes also includes padding bytes, - // which shouldn't be counted here. - allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, - SVELocalsSize + StackOffset::getFixed(NumBytes), - NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, - CFAOffset, MFI.hasVarSizedObjects()); + // Finally allocate remaining SVE stack space. + emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, + -AllocateAfter, TII, MachineInstr::FrameSetup, false, false, + nullptr, EmitAsyncCFI && !HasFP && AllocateAfter, + AllocateBefore + StackOffset::getFixed( + (int64_t)MFI.getStackSize() - NumBytes)); + + // Allocate space for the rest of the frame. + if (NumBytes) { + unsigned scratchSPReg = AArch64::SP; + + if (NeedsRealignment) { + scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); + assert(scratchSPReg != AArch64::NoRegister); + } + + // If we're a leaf function, try using the red zone. + if (!canUseRedZone(MF)) { + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + emitFrameOffset( + MBB, MBBI, DL, scratchSPReg, AArch64::SP, + StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, + false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, + SVEStackSize + + StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); + } + if (NeedsRealignment) { + assert(MFI.getMaxAlign() > Align(1)); + assert(scratchSPReg != AArch64::SP); + + // SUB X9, SP, NumBytes + // -- X9 is temporary register, so shouldn't contain any live data here, + // -- free to use. This is already produced by emitFrameOffset above. + // AND SP, X9, 0b11111...0000 + uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(scratchSPReg, RegState::Kill) + .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); + AFI->setStackRealigned(true); + + // No need for SEH instructions here; if we're realigning the stack, + // we've set a frame pointer and already finished the SEH prologue. + assert(!NeedsWinCFI); + } } // If we need a base pointer, set it up here. It's whatever the value of the @@ -4166,170 +4024,3 @@ void AArch64FrameLowering::orderFrameObjects( dbgs() << "\n"; }); } - -/// Emit a loop to decrement SP until it is equal to TargetReg, with probes at -/// least every ProbeSize bytes. Returns an iterator of the first instruction -/// after the loop. The difference between SP and TargetReg must be an exact -/// multiple of ProbeSize. -MachineBasicBlock::iterator -AArch64FrameLowering::inlineStackProbeLoopExactMultiple( - MachineBasicBlock::iterator MBBI, int64_t ProbeSize, - Register TargetReg) const { - MachineBasicBlock &MBB = *MBBI->getParent(); - MachineFunction &MF = *MBB.getParent(); - const AArch64InstrInfo *TII = - MF.getSubtarget().getInstrInfo(); - DebugLoc DL = MBB.findDebugLoc(MBBI); - - MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(MBBInsertPoint, ExitMBB); - - // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable - // in SUB). - emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-ProbeSize), TII, - MachineInstr::FrameSetup); - // STR XZR, [SP] - BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(MachineInstr::FrameSetup); - // CMP SP, TargetReg - BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64), - AArch64::XZR) - .addReg(AArch64::SP) - .addReg(TargetReg) - .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) - .setMIFlags(MachineInstr::FrameSetup); - // B.CC Loop - BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc)) - .addImm(AArch64CC::NE) - .addMBB(LoopMBB) - .setMIFlags(MachineInstr::FrameSetup); - - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); - MBB.addSuccessor(LoopMBB); - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); - - return ExitMBB->begin(); -} - -void AArch64FrameLowering::inlineStackProbeFixed( - MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize, - StackOffset CFAOffset) const { - MachineBasicBlock *MBB = MBBI->getParent(); - MachineFunction &MF = *MBB->getParent(); - const AArch64InstrInfo *TII = - MF.getSubtarget().getInstrInfo(); - AArch64FunctionInfo *AFI = MF.getInfo(); - bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); - bool HasFP = hasFP(MF); - - DebugLoc DL; - int64_t ProbeSize = MF.getInfo()->getStackProbeSize(); - int64_t NumBlocks = FrameSize / ProbeSize; - int64_t ResidualSize = FrameSize % ProbeSize; - - LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, " - << NumBlocks << " blocks of " << ProbeSize - << " bytes, plus " << ResidualSize << " bytes\n"); - - // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or - // ordinary loop. - if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) { - for (int i = 0; i < NumBlocks; ++i) { - // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not - // encodable in a SUB). - emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-ProbeSize), TII, - MachineInstr::FrameSetup, false, false, nullptr, - EmitAsyncCFI && !HasFP, CFAOffset); - CFAOffset += StackOffset::getFixed(ProbeSize); - // STR XZR, [SP] - BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(MachineInstr::FrameSetup); - } - } else if (NumBlocks != 0) { - // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not - // encodable in ADD). ScrathReg may temporarily become the CFA register. - emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP, - StackOffset::getFixed(-ProbeSize * NumBlocks), TII, - MachineInstr::FrameSetup, false, false, nullptr, - EmitAsyncCFI && !HasFP, CFAOffset); - CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks); - MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg); - MBB = MBBI->getParent(); - if (EmitAsyncCFI && !HasFP) { - // Set the CFA register back to SP. - const AArch64RegisterInfo &RegInfo = - *MF.getSubtarget().getRegisterInfo(); - unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); - BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - } - - if (ResidualSize != 0) { - // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable - // in SUB). - emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-ResidualSize), TII, - MachineInstr::FrameSetup, false, false, nullptr, - EmitAsyncCFI && !HasFP, CFAOffset); - if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) { - // STR XZR, [SP] - BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(MachineInstr::FrameSetup); - } - } -} - -void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, - MachineBasicBlock &MBB) const { - // Get the instructions that need to be replaced. We emit at most two of - // these. Remember them in order to avoid complications coming from the need - // to traverse the block while potentially creating more blocks. - SmallVector ToReplace; - for (MachineInstr &MI : MBB) - if (MI.getOpcode() == AArch64::PROBED_STACKALLOC || - MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR) - ToReplace.push_back(&MI); - - for (MachineInstr *MI : ToReplace) { - if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) { - Register ScratchReg = MI->getOperand(0).getReg(); - int64_t FrameSize = MI->getOperand(1).getImm(); - StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(), - MI->getOperand(3).getImm()); - inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize, - CFAOffset); - } else { - assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR && - "Stack probe pseudo-instruction expected"); - const AArch64InstrInfo *TII = - MI->getMF()->getSubtarget().getInstrInfo(); - Register TargetReg = MI->getOperand(0).getReg(); - (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true); - } - MI->eraseFromParent(); - } -} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 941af03a78b738703e60bc6e6fe57ea80dbac71a..147b5c181be5e53788804c5aafc00282e5b02f73 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -150,28 +150,10 @@ private: MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; - void allocateStackSpace(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - int64_t RealignmentPadding, StackOffset AllocSize, - bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, - StackOffset InitialOffset, bool FollowupAllocs) const; /// Emit target zero call-used regs. void emitZeroCallUsedRegs(BitVector RegsToZero, MachineBasicBlock &MBB) const override; - - /// Replace a StackProbe stub (if any) with the actual probe code inline - void inlineStackProbe(MachineFunction &MF, - MachineBasicBlock &PrologueMBB) const override; - - void inlineStackProbeFixed(MachineBasicBlock::iterator MBBI, - Register ScratchReg, int64_t FrameSize, - StackOffset CFAOffset) const; - - MachineBasicBlock::iterator - inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI, - int64_t NegProbeSize, - Register TargetReg) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f042f4f9df5d98cd8a9c758d8eb7f2e7b9db5652..0cc5e7fc5cc3398e1de637aa298e699adbf1d790 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -556,7 +556,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + if (Subtarget->isTargetWindows()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); // Constant pool entries setOperationAction(ISD::ConstantPool, MVT::i64, Custom); @@ -2290,7 +2293,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::CSINC) MAKE_CASE(AArch64ISD::THREAD_POINTER) MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) - MAKE_CASE(AArch64ISD::PROBED_ALLOCA) MAKE_CASE(AArch64ISD::ABDS_PRED) MAKE_CASE(AArch64ISD::ABDU_PRED) MAKE_CASE(AArch64ISD::HADDS_PRED) @@ -2649,22 +2651,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( return BB; } -MachineBasicBlock * -AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI, - MachineBasicBlock *MBB) const { - MachineFunction &MF = *MBB->getParent(); - MachineBasicBlock::iterator MBBI = MI.getIterator(); - DebugLoc DL = MBB->findDebugLoc(MBBI); - const AArch64InstrInfo &TII = - *MF.getSubtarget().getInstrInfo(); - Register TargetReg = MI.getOperand(0).getReg(); - MachineBasicBlock::iterator NextInst = - TII.probedStackAlloc(MBBI, TargetReg, false); - - MI.eraseFromParent(); - return NextInst->getParent(); -} - MachineBasicBlock * AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, @@ -2793,8 +2779,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case AArch64::CATCHRET: return EmitLoweredCatchRet(MI, BB); - case AArch64::PROBED_STACKALLOC_DYN: - return EmitDynamicProbedAlloc(MI, BB); case AArch64::LD1_MXIPXX_H_PSEUDO_B: return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB); case AArch64::LD1_MXIPXX_H_PSEUDO_H: @@ -13687,34 +13671,9 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, AN->getMemOperand()); } -SDValue -AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { - +SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC( + SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const { SDLoc dl(Op); - // Get the inputs. - SDNode *Node = Op.getNode(); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - MaybeAlign Align = - cast(Op.getOperand(2))->getMaybeAlignValue(); - EVT VT = Node->getValueType(0); - - if (DAG.getMachineFunction().getFunction().hasFnAttribute( - "no-stack-arg-probe")) { - SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); - Chain = SP.getValue(1); - SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); - if (Align) - SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); - SDValue Ops[2] = {SP, Chain}; - return DAG.getMergeValues(Ops, dl); - } - - Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(), PtrVT, 0); @@ -13738,59 +13697,7 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size, DAG.getConstant(4, dl, MVT::i64)); - - SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); - Chain = SP.getValue(1); - SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); - if (Align) - SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); - - Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl); - - SDValue Ops[2] = {SP, Chain}; - return DAG.getMergeValues(Ops, dl); -} - -SDValue -AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { - // Get the inputs. - SDNode *Node = Op.getNode(); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - - MaybeAlign Align = - cast(Op.getOperand(2))->getMaybeAlignValue(); - SDLoc dl(Op); - EVT VT = Node->getValueType(0); - - // Construct the new SP value in a GPR. - SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); - Chain = SP.getValue(1); - SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); - if (Align) - SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); - - // Set the real SP to the new value with a probing loop. - Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP); - SDValue Ops[2] = {SP, Chain}; - return DAG.getMergeValues(Ops, dl); -} - -SDValue -AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - - if (Subtarget->isTargetWindows()) - return LowerWindowsDYNAMIC_STACKALLOC(Op, DAG); - else if (hasInlineStackProbe(MF)) - return LowerInlineDYNAMIC_STACKALLOC(Op, DAG); - else - return SDValue(); + return Chain; } // When x and y are extended, lower: @@ -13844,6 +13751,51 @@ SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, dl, VT, Add, tmp); } +SDValue +AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && + "Only Windows alloca probing supported"); + SDLoc dl(Op); + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + MaybeAlign Align = + cast(Op.getOperand(2))->getMaybeAlignValue(); + EVT VT = Node->getValueType(0); + + if (DAG.getMachineFunction().getFunction().hasFnAttribute( + "no-stack-arg-probe")) { + SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); + Chain = SP.getValue(1); + SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); + if (Align) + SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), + DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); + SDValue Ops[2] = {SP, Chain}; + return DAG.getMergeValues(Ops, dl); + } + + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); + + Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG); + + SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); + Chain = SP.getValue(1); + SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); + if (Align) + SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), + DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); + + Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl); + + SDValue Ops[2] = {SP, Chain}; + return DAG.getMergeValues(Ops, dl); +} + SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -26104,9 +26056,3 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const { } return true; } - -bool AArch64TargetLowering::hasInlineStackProbe( - const MachineFunction &MF) const { - return !Subtarget->isTargetWindows() && - MF.getInfo()->hasStackProbing(); -} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 9b388c7f8668ec8a052fa5dd04a303710cfec4fb..aca45f113e736679e22961efba1668df7e17ef19 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -90,10 +90,6 @@ enum NodeType : unsigned { ADC, SBC, // adc, sbc instructions - // To avoid stack clash, allocation is performed by block and each block is - // probed. - PROBED_ALLOCA, - // Predicated instructions where inactive lanes produce undefined results. ABDS_PRED, ABDU_PRED, @@ -512,13 +508,6 @@ const unsigned RoundingBitsPos = 22; const ArrayRef getGPRArgRegs(); const ArrayRef getFPRArgRegs(); -/// Maximum allowed number of unprobed bytes above SP at an ABI -/// boundary. -const unsigned StackProbeMaxUnprobedStack = 1024; - -/// Maximum number of iterations to unroll for a constant size probing loop. -const unsigned StackProbeMaxLoopUnroll = 4; - } // namespace AArch64 class AArch64Subtarget; @@ -614,9 +603,6 @@ public: MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, - MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const; @@ -956,9 +942,6 @@ public: // used for 64bit and 128bit vectors as well. bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; - /// True if stack clash protection is enabled for this functions. - bool hasInlineStackProbe(const MachineFunction &MF) const override; - private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -1120,10 +1103,10 @@ private: SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, + SDValue &Size, + SelectionDAG &DAG) const; SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b3b42a97e8c9114a3436e89e730dc54c6f8acd4b..0691e07a639beee77f8096e2cdb1f70e259a0e03 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" -#include "AArch64ExpandImm.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -19,7 +18,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -8430,94 +8428,6 @@ unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { return AArch64::BLR; } -MachineBasicBlock::iterator -AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI, - Register TargetReg, bool FrameSetup) const { - assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP"); - - MachineBasicBlock &MBB = *MBBI->getParent(); - MachineFunction &MF = *MBB.getParent(); - const AArch64InstrInfo *TII = - MF.getSubtarget().getInstrInfo(); - int64_t ProbeSize = MF.getInfo()->getStackProbeSize(); - DebugLoc DL = MBB.findDebugLoc(MBBI); - - MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *LoopTestMBB = - MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(MBBInsertPoint, LoopTestMBB); - MachineBasicBlock *LoopBodyMBB = - MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(MBBInsertPoint, LoopBodyMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(MBBInsertPoint, ExitMBB); - MachineInstr::MIFlag Flags = - FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags; - - // LoopTest: - // SUB SP, SP, #ProbeSize - emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP, - AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags); - - // CMP SP, TargetReg - BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64), - AArch64::XZR) - .addReg(AArch64::SP) - .addReg(TargetReg) - .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) - .setMIFlags(Flags); - - // B. LoopExit - BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc)) - .addImm(AArch64CC::LE) - .addMBB(ExitMBB) - .setMIFlags(Flags); - - // STR XZR, [SP] - BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(Flags); - - // B loop - BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B)) - .addMBB(LoopTestMBB) - .setMIFlags(Flags); - - // LoopExit: - // MOV SP, TargetReg - BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP) - .addReg(TargetReg) - .addImm(0) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) - .setMIFlags(Flags); - - // STR XZR, [SP] - BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::STRXui)) - .addReg(AArch64::XZR) - .addReg(AArch64::SP) - .addImm(0) - .setMIFlags(Flags); - - ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); - - LoopTestMBB->addSuccessor(ExitMBB); - LoopTestMBB->addSuccessor(LoopBodyMBB); - LoopBodyMBB->addSuccessor(LoopTestMBB); - MBB.addSuccessor(LoopTestMBB); - - // Update liveins. - if (MF.getRegInfo().reservedRegsFrozen()) { - recomputeLiveIns(*LoopTestMBB); - recomputeLiveIns(*LoopBodyMBB); - recomputeLiveIns(*ExitMBB); - } - - return ExitMBB->begin(); -} - #define GET_INSTRINFO_HELPERS #define GET_INSTRMAP_INFO #include "AArch64GenInstrInfo.inc" diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 7e84b86fc52cd617490d83f20b972b98fb3cd3d0..20210a96d67ad28def588797a694c958c5aaaa79 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -340,12 +340,6 @@ public: static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized); - // Decrement the SP, issuing probes along the way. `TargetReg` is the new top - // of the stack. `FrameSetup` is passed as true, if the allocation is a part - // of constructing the activation frame of a function. - MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, - Register TargetReg, - bool FrameSetup) const; #define GET_INSTRINFO_HELPER_DECLS #include "AArch64GenInstrInfo.inc" diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 3e3dc863dc5f753cca47a51552c9f348bba9804e..27a7e26c5e188a1d0894e006b38f8dc281fb4b76 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -820,12 +820,6 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; - -def AArch64probedalloca - : SDNode<"AArch64ISD::PROBED_ALLOCA", - SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPMayStore]>; - def AArch64mrs : SDNode<"AArch64ISD::MRS", SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, [SDNPHasChain, SDNPOutGlue]>; @@ -888,8 +882,7 @@ include "SMEInstrFormats.td" // Miscellaneous instructions. //===----------------------------------------------------------------------===// -let hasSideEffects = 1, isCodeGenOnly = 1 in { -let Defs = [SP], Uses = [SP] in { +let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { // We set Sched to empty list because we expect these instructions to simply get // removed in most cases. def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), @@ -898,34 +891,7 @@ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, Sched<[]>; -} - -let Defs = [SP, NZCV], Uses = [SP] in { -// Probed stack allocation of a constant size, used in function prologues when -// stack-clash protection is enabled. -def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch), - (ins i64imm:$stacksize, i64imm:$fixed_offset, - i64imm:$scalable_offset), - []>, - Sched<[]>; - -// Probed stack allocation of a variable size, used in function prologues when -// stack-clash protection is enabled. -def PROBED_STACKALLOC_VAR : Pseudo<(outs), - (ins GPR64sp:$target), - []>, - Sched<[]>; - -// Probed stack allocations of a variable size, used for allocas of unknown size -// when stack-clash protection is enabled. -let usesCustomInserter = 1 in -def PROBED_STACKALLOC_DYN : Pseudo<(outs), - (ins GPR64common:$target), - [(AArch64probedalloca GPR64common:$target)]>, - Sched<[]>; - -} // Defs = [SP, NZCV], Uses = [SP] in -} // hasSideEffects = 1, isCodeGenOnly = 1 +} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 let isReMaterializable = 1, isCodeGenOnly = 1 in { // FIXME: The following pseudo instructions are only needed because remat diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index 0bef3c2d248310d635628549543269aa09d3b52d..961a19317d6660b8a36f6ea0d13aa9cfeade80b2 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -97,45 +97,14 @@ AArch64FunctionInfo::AArch64FunctionInfo(const Function &F, if (const auto *BTE = mdconst::extract_or_null( F.getParent()->getModuleFlag("branch-target-enforcement"))) BranchTargetEnforcement = BTE->getZExtValue(); - } else { - const StringRef BTIEnable = - F.getFnAttribute("branch-target-enforcement").getValueAsString(); - assert(BTIEnable.equals_insensitive("true") || - BTIEnable.equals_insensitive("false")); - BranchTargetEnforcement = BTIEnable.equals_insensitive("true"); + return; } - // The default stack probe size is 4096 if the function has no - // stack-probe-size attribute. This is a safe default because it is the - // smallest possible guard page size. - uint64_t ProbeSize = 4096; - if (F.hasFnAttribute("stack-probe-size")) - ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size"); - else if (const auto *PS = mdconst::extract_or_null( - F.getParent()->getModuleFlag("stack-probe-size"))) - ProbeSize = PS->getZExtValue(); - assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size"); - - if (STI->isTargetWindows()) { - if (!F.hasFnAttribute("no-stack-arg-probe")) - StackProbeSize = ProbeSize; - } else { - // Round down to the stack alignment. - uint64_t StackAlign = - STI->getFrameLowering()->getTransientStackAlign().value(); - ProbeSize = std::max(StackAlign, ProbeSize & ~(StackAlign - 1U)); - StringRef ProbeKind; - if (F.hasFnAttribute("probe-stack")) - ProbeKind = F.getFnAttribute("probe-stack").getValueAsString(); - else if (const auto *PS = dyn_cast_or_null( - F.getParent()->getModuleFlag("probe-stack"))) - ProbeKind = PS->getString(); - if (ProbeKind.size()) { - if (ProbeKind != "inline-asm") - report_fatal_error("Unsupported stack probing method"); - StackProbeSize = ProbeSize; - } - } + const StringRef BTIEnable = + F.getFnAttribute("branch-target-enforcement").getValueAsString(); + assert(BTIEnable.equals_insensitive("true") || + BTIEnable.equals_insensitive("false")); + BranchTargetEnforcement = BTIEnable.equals_insensitive("true"); } MachineFunctionInfo *AArch64FunctionInfo::clone( diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index d50011594eb1ac0a7de1087185c8be0c09293e2c..d82fb436925ec6fa2bd1b7f61bb73b9caba5dc29 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -192,8 +192,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// True if the function need asynchronous unwind information. mutable std::optional NeedsAsyncDwarfUnwindInfo; - int64_t StackProbeSize = 0; - public: AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI); @@ -449,10 +447,6 @@ public: bool needsDwarfUnwindInfo(const MachineFunction &MF) const; bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const; - bool hasStackProbing() const { return StackProbeSize != 0; } - - int64_t getStackProbeSize() const { return StackProbeSize; } - private: // Hold the lists of LOHs. MILOHContainer LOHContainerSet; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 0dd2b4d48dd69570dfd6241d2a5c87c19eb03311..d905da4eaec335b87b025763e350a6618bb3a19b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -797,9 +797,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return Query.Types[0] == p0 && Query.Types[1] == s64; }); - getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom(); - - getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower(); + getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); if (ST.hasMOPS()) { // G_BZERO is not supported. Currently it is only emitted by @@ -993,8 +991,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeMemOps(MI, Helper); case TargetOpcode::G_FCOPYSIGN: return legalizeFCopySign(MI, Helper); - case TargetOpcode::G_DYN_STACKALLOC: - return legalizeDynStackAlloc(MI, Helper); } llvm_unreachable("expected switch to return"); @@ -1691,42 +1687,3 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI, MI.eraseFromParent(); return true; } - -bool AArch64LegalizerInfo::legalizeDynStackAlloc( - MachineInstr &MI, LegalizerHelper &Helper) const { - MachineFunction &MF = *MI.getParent()->getParent(); - MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; - MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); - - // If stack probing is not enabled for this function, use the default - // lowering. - if (!MF.getFunction().hasFnAttribute("probe-stack") || - MF.getFunction().getFnAttribute("probe-stack").getValueAsString() != - "inline-asm") { - Helper.lowerDynStackAlloc(MI); - return true; - } - - Register Dst = MI.getOperand(0).getReg(); - Register AllocSize = MI.getOperand(1).getReg(); - Align Alignment = assumeAligned(MI.getOperand(2).getImm()); - - assert(MRI.getType(Dst) == LLT::pointer(0, 64) && - "Unexpected type for dynamic alloca"); - assert(MRI.getType(AllocSize) == LLT::scalar(64) && - "Unexpected type for dynamic alloca"); - - LLT PtrTy = MRI.getType(Dst); - Register SPReg = - Helper.getTargetLowering().getStackPointerRegisterToSaveRestore(); - Register SPTmp = - Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); - auto NewMI = - MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp}); - MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass); - MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI); - MIRBuilder.buildCopy(Dst, SPTmp); - - MI.eraseFromParent(); - return true; -} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 94484ea59d1559e639cc36ba90e88aa3bd915ce8..c10f6e071ed430c9f4b8cbe73646b4404d07f72a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -58,7 +58,6 @@ private: bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const; - bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp index 104461cff0a91b77bdb5f69e43b045e610806a11..a4a247f85f3d727149006ff84bc67153b60c2a95 100644 --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -528,10 +528,6 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, // memory intrinsics getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); - getActionDefinitionsBuilder({G_DYN_STACKALLOC, - G_STACKSAVE, - G_STACKRESTORE}).lower(); - // fp intrinsics getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN) .scalarize(0) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 575cd6b874e3563152a996d3a11e6f09bd57d1b5..5f3544add39844aaafd33520f4f1e920cfb0366d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -2392,8 +2392,8 @@ declare ptr @llvm.stacksave() declare void @llvm.stackrestore(ptr) define void @test_stacksaverestore() { ; CHECK-LABEL: name: test_stacksaverestore - ; CHECK: [[SAVE:%[0-9]+]]:_(p0) = G_STACKSAVE - ; CHECK-NEXT: G_STACKRESTORE [[SAVE]] + ; CHECK: [[SAVE:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK-NEXT: $sp = COPY [[SAVE]](p0) ; CHECK-NEXT: RET_ReallyLR %sp = call ptr @llvm.stacksave() call void @llvm.stackrestore(ptr %sp) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir index 82781cebc55a93206c613faa4d977ee209ca57a3..e9188fb89f699a0434bbfdb8af9d4defa0544334 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir @@ -19,21 +19,6 @@ ret i128* %addr } - define i8* @test_simple_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { - %addr = alloca i8, i32 %numelts - ret i8* %addr - } - - define i8* @test_aligned_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { - %addr = alloca i8, i32 %numelts, align 32 - ret i8* %addr - } - - define i128* @test_natural_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { - %addr = alloca i128, i32 %numelts - ret i128* %addr - } - ... --- name: test_simple_alloca @@ -52,23 +37,22 @@ body: | ; CHECK-LABEL: name: test_simple_alloca ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64) - ; CHECK-NEXT: $sp = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) - ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64) + ; CHECK: $sp = COPY [[INTTOPTR]](p0) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %0:_(s32) = COPY $w0 %3:_(s64) = G_CONSTANT i64 1 %1:_(s64) = G_ZEXT %0(s32) @@ -99,25 +83,24 @@ body: | ; CHECK-LABEL: name: test_aligned_alloca ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND1]](s64) - ; CHECK-NEXT: $sp = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) - ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32 + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND1]](s64) + ; CHECK: $sp = COPY [[INTTOPTR]](p0) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %0:_(s32) = COPY $w0 %3:_(s64) = G_CONSTANT i64 1 %1:_(s64) = G_ZEXT %0(s32) @@ -148,23 +131,22 @@ body: | ; CHECK-LABEL: name: test_natural_alloca ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64) - ; CHECK-NEXT: $sp = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) - ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64) + ; CHECK: $sp = COPY [[INTTOPTR]](p0) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %0:_(s32) = COPY $w0 %3:_(s64) = G_CONSTANT i64 16 %1:_(s64) = G_ZEXT %0(s32) @@ -178,139 +160,3 @@ body: | RET_ReallyLR implicit $x0 ... ---- -name: test_simple_alloca_stack_probing -alignment: 4 -tracksRegLiveness: true -liveins: - - { reg: '$w0' } -frameInfo: - maxAlignment: 1 -stack: - - { id: 0, name: addr, type: variable-sized, alignment: 1 } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $w0 - ; CHECK-LABEL: name: test_simple_alloca_stack_probing - ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[SHL]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[SUB]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit-def $nzcv, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) - ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %0:_(s32) = COPY $w0 - %1:_(s64) = G_ZEXT %0(s32) - %9:_(s64) = G_CONSTANT i64 0 - %2:_(s64) = G_SHL %1, %9(s64) - %4:_(s64) = G_CONSTANT i64 15 - %5:_(s64) = nuw G_ADD %2, %4 - %6:_(s64) = G_CONSTANT i64 -16 - %7:_(s64) = G_AND %5, %6 - %8:_(p0) = G_DYN_STACKALLOC %7(s64), 1 - $x0 = COPY %8(p0) - RET_ReallyLR implicit $x0 -... ---- -name: test_aligned_alloca_stack_probing -alignment: 4 -tracksRegLiveness: true -liveins: - - { reg: '$w0' } -frameInfo: - maxAlignment: 32 -stack: - - { id: 0, name: addr, type: variable-sized, alignment: 32 } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $w0 - ; CHECK-LABEL: name: test_aligned_alloca_stack_probing - ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[SHL]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[AND1]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit-def $nzcv, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) - ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %0:_(s32) = COPY $w0 - %1:_(s64) = G_ZEXT %0(s32) - %9:_(s64) = G_CONSTANT i64 0 - %2:_(s64) = G_SHL %1, %9(s64) - %4:_(s64) = G_CONSTANT i64 15 - %5:_(s64) = nuw G_ADD %2, %4 - %6:_(s64) = G_CONSTANT i64 -16 - %7:_(s64) = G_AND %5, %6 - %8:_(p0) = G_DYN_STACKALLOC %7(s64), 32 - $x0 = COPY %8(p0) - RET_ReallyLR implicit $x0 -... ---- -name: test_natural_alloca_stack_probing -alignment: 4 -tracksRegLiveness: true -liveins: - - { reg: '$w0' } -frameInfo: - maxAlignment: 1 -stack: - - { id: 0, name: addr, type: variable-sized, alignment: 1 } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $w0 - ; CHECK-LABEL: name: test_natural_alloca_stack_probing - ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[SHL]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[SUB]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit-def $nzcv, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) - ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %0:_(s32) = COPY $w0 - %1:_(s64) = G_ZEXT %0(s32) - %9:_(s64) = G_CONSTANT i64 4 - %2:_(s64) = G_SHL %1, %9(s64) - %4:_(s64) = G_CONSTANT i64 15 - %5:_(s64) = nuw G_ADD %2, %4 - %6:_(s64) = G_CONSTANT i64 -16 - %7:_(s64) = G_AND %5, %6 - %8:_(p0) = G_DYN_STACKALLOC %7(s64), 1 - $x0 = COPY %8(p0) - RET_ReallyLR implicit $x0 \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index efae9b66b53dee601b53a58b59528160606e4e58..b4fe73d29fa650873b9820cbe94a1b962c02a32f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -641,22 +641,7 @@ # DEBUG-NEXT: G_JUMP_TABLE (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK -# DEBUG-NEXT: G_DYN_STACKALLOC (opcode [[DYN_STACKALLOC:[0-9]+]]): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_STACKSAVE (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[DYN_STACKALLOC]] -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_STACKRESTORE (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[DYN_STACKALLOC]] -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_STACKSAVE (opcode [[STACKSAVE:[0-9]+]]): 1 type index, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_STACKRESTORE (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[STACKSAVE]] +# DEBUG-NEXT: G_DYN_STACKALLOC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_STRICT_FADD (opcode {{[0-9]+}}): 1 type index, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll deleted file mode 100644 index 97ecca0bd77b09feb28eba293232a58688036ee4..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll +++ /dev/null @@ -1,39 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=1 -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s - -declare void @use_addr(ptr) -declare ptr @llvm.stacksave.p0() -declare void @llvm.stackrestore.p0(ptr) - -define void @test_scoped_alloca(i64 %n) { -; CHECK-LABEL: test_scoped_alloca: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 32 -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: bl llvm.stacksave.p0 -; CHECK-NEXT: add x9, x19, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: bl use_addr -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: bl llvm.stackrestore.p0 -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-NEXT: ret - %sp = call ptr @llvm.stacksave.p0() - %addr = alloca i8, i64 %n - call void @use_addr(ptr %addr) - call void @llvm.stackrestore.p0(ptr %sp) - ret void -} diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir index 265c474fbc5db48b94b24731711fdc824ec925c6..623c0f240be4fd7794123055f059cf8310f86331 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir @@ -4,8 +4,8 @@ name: hasBasepointer # CHECK-LABEL: name: hasBasepointer # CHECK: bb.0: -# CHECK: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0 # CHECK: STRXui $x0, $x19, 0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir index 35fd7ca77d5cf3efc35a2ad1a2457856c36898bc..e367a380f8ba9f07f3b4a99768c80134e553a136 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir @@ -7,9 +7,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp - ; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #-32 ; CHECK-NEXT: addvl sp, sp, #-28 + ; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: ldr x8, [sp, #2048] ; CHECK-NEXT: addvl sp, sp, #31 ; CHECK-NEXT: addvl sp, sp, #29 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir index 680f9c335c250c54aed55bd8e14df87bedb02599..d54f67634d02a7b97d173159ba582f519dc00b38 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir @@ -4,9 +4,9 @@ name: LateScavengingSlot # CHECK-LABEL: name: LateScavengingSlot # CHECK: bb.0: -# CHECK: $sp = frame-setup SUBXri $sp, 8, 12 +# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0 # CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1 # CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 8b657c95bfc7c84c388d3802d121069e4e7a2329..7c87587c6dc4e2c6b2d127b45d43e6d4b47ce49f 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -60,10 +60,10 @@ # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32 @@ -77,7 +77,7 @@ # ASM-LABEL: test_allocate_sve: # ASM: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM: .cfi_def_cfa_offset 32 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG # ASM: .cfi_def_cfa wsp, 32 # ASM: .cfi_def_cfa_offset 16 @@ -87,7 +87,7 @@ # # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_offset: +32 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa: reg31 +32 # UNWINDINFO: DW_CFA_def_cfa_offset: +16 @@ -125,9 +125,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w21, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 # # CHECK-NEXT: $x20 = IMPLICIT_DEF @@ -149,7 +149,7 @@ body: | # ASM: .cfi_offset w20, -8 # ASM-NEXT: .cfi_offset w21, -16 # ASM-NEXT: .cfi_offset w29, -32 -# ASM: .cfi_def_cfa_offset 48 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG # # ASM: .cfi_def_cfa wsp, 48 @@ -164,7 +164,7 @@ body: | # UNWINDINFO: DW_CFA_offset: reg20 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 -# UNWINDINFO: DW_CFA_def_cfa_offset: +48 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa: reg31 +48 @@ -205,9 +205,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2 -# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]] +# CHECK-NEXT: $sp = ANDXri killed $[[TMP]] # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 @@ -267,9 +267,9 @@ body: | # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 @@ -292,7 +292,7 @@ body: | # ASM-LABEL: test_address_sve: # ASM: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM: .cfi_def_cfa_offset 32 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG # # ASM: .cfi_def_cfa wsp, 32 @@ -302,7 +302,7 @@ body: | # # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_offset: +32 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa: reg31 +32 @@ -353,8 +353,8 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: STR_ZXI $z0, $fp, -1 # CHECK-NEXT: STR_ZXI $z1, $fp, -2 @@ -429,9 +429,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1 # CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4 @@ -448,7 +448,7 @@ body: | # ASM-LABEL: test_stack_arg_sve: # ASM: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM: .cfi_def_cfa_offset 32 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG # # ASM: .cfi_def_cfa wsp, 32 @@ -458,7 +458,7 @@ body: | # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_offset: +32 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa: reg31 +32 @@ -640,8 +640,8 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0 # CHECK-NEXT: STRXui $xzr, $x19, 0 # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 @@ -863,9 +863,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK: $sp = frame-setup SUBXri $sp, 32, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK: $sp = frame-setup SUBXri $sp, 32, 0 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 @@ -916,7 +916,7 @@ body: | # ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG -# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 144 * VG +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG # ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG # # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG @@ -950,7 +950,7 @@ body: | # UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +144, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus @@ -1031,9 +1031,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -1 -# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]] +# CHECK-NEXT: $sp = ANDXri killed $[[TMP]] # CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4 diff --git a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir index f6fc627ac2d3d87368af69375b8d5ee5fa070409..1b9411d07f433ab68de88285a02a83156c383db6 100644 --- a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir +++ b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir @@ -21,7 +21,7 @@ stack: - { id: 1, size: 4, alignment: 4, local-offset: -68 } # CHECK: body: -# CHECK: $sp = frame-setup ANDXri killed ${{x[0-9]+}}, 7865 +# CHECK: $sp = ANDXri killed ${{x[0-9]+}}, 7865 # CHECK: STRSui $s0, $sp, 0 # CHECK: STRSui $s0, $fp, 7 body: | diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll index 5acbb22bf1ab5a4978caa5f4a8e5e36e320ad0c6..1672a7eb8739779123248bdc9c661241293b80c3 100644 --- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll +++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll @@ -148,9 +148,9 @@ entry: ; CHECK-LABEL: local_stack_alloc: ; CHECK: mov x29, sp +; CHECK: addvl sp, sp, #-2 ; CHECK: sub sp, sp, #16, lsl #12 ; CHECK: sub sp, sp, #16 -; CHECK: addvl sp, sp, #-2 ; Stack guard is placed below the SVE stack area (and above all fixed-width objects) ; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12 @@ -198,9 +198,9 @@ entry: ; CHECK-LABEL: local_stack_alloc_strong: ; CHECK: mov x29, sp +; CHECK: addvl sp, sp, #-3 ; CHECK: sub sp, sp, #16, lsl #12 ; CHECK: sub sp, sp, #16 -; CHECK: addvl sp, sp, #-3 ; Stack guard is placed at the top of the SVE stack area ; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard] diff --git a/llvm/test/CodeGen/AArch64/stack-probing-64k.ll b/llvm/test/CodeGen/AArch64/stack-probing-64k.ll deleted file mode 100644 index 0a3198fc520e99a143e827b4285c986797d90051..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/stack-probing-64k.ll +++ /dev/null @@ -1,392 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s - -; Tests for prolog sequences for stack probing, when using a 64KiB stack guard. - -; 64k bytes is the largest frame we can probe in one go. -define void @static_65536(ptr %out) #0 { -; CHECK-LABEL: static_65536: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 65552 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 65536, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 64k+16 bytes, still needs just one probe. -define void @static_65552(ptr %out) #0 { -; CHECK-LABEL: static_65552: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 65552 -; CHECK-NEXT: str xzr, [sp], #-16 -; CHECK-NEXT: .cfi_def_cfa_offset 65568 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 65552, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 64k+1024 bytes, the largest frame which needs just one probe. -define void @static_66560(ptr %out) #0 { -; CHECK-LABEL: static_66560: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 65552 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 66576 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 66560, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 64k+1024+16 bytes, the smallest frame which needs two probes. -define void @static_66576(ptr %out) #0 { -; CHECK-LABEL: static_66576: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 65552 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 66592 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 1056 -; CHECK-NEXT: add sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 66576, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 2*64k+1024, the largest frame needing two probes. -define void @static_132096(ptr %out) #0 { -; CHECK-LABEL: static_132096: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 65552 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 131088 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 132112 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #32, lsl #12 // =131072 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 132096, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 5*64k-16, the largest frame probed without a loop. -define void @static_327664(ptr %out) #0 { -; CHECK-LABEL: static_327664: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 65552 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 131088 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 196624 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: .cfi_def_cfa_offset 262160 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #15, lsl #12 // =61440 -; CHECK-NEXT: .cfi_def_cfa_offset 323600 -; CHECK-NEXT: sub sp, sp, #4080 -; CHECK-NEXT: .cfi_def_cfa_offset 327680 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #79, lsl #12 // =323584 -; CHECK-NEXT: .cfi_def_cfa_offset 4096 -; CHECK-NEXT: add sp, sp, #4080 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 327664, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 5*64k, smallest frame probed with a loop. -define void @static_327680(ptr %out) #0 { -; CHECK-LABEL: static_327680: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 -; CHECK-NEXT: .cfi_def_cfa w9, 327696 -; CHECK-NEXT: .LBB6_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.ne .LBB6_1 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 327680, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 5*64k+1024, large enough to use a loop, but not a multiple of 64KiB -; so has a reminder, but no extra probe. -define void @static_328704(ptr %out) #0 { -; CHECK-LABEL: static_328704: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 -; CHECK-NEXT: .cfi_def_cfa w9, 327696 -; CHECK-NEXT: .LBB7_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.ne .LBB7_1 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 328720 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 328704, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; 5*64k+1040, large enough to use a loop, has a reminder and -; an extra probe. -define void @static_328720(ptr %out) #0 { -; CHECK-LABEL: static_328720: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 -; CHECK-NEXT: .cfi_def_cfa w9, 327696 -; CHECK-NEXT: .LBB8_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.ne .LBB8_1 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: sub sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 328736 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 -; CHECK-NEXT: .cfi_def_cfa_offset 1056 -; CHECK-NEXT: add sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 328720, align 1 - store i8* %v, ptr %out, align 8 - ret void -} - -; A small allocation, but with a very large alignment requirement. We do this -; by moving SP far enough that a sufficiently-aligned block will exist -; somewhere in the stack frame, so must probe the whole of that larger SP move. -define void @static_16_align_131072(ptr %out) #0 { -; CHECK-LABEL: static_16_align_131072: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976 -; CHECK-NEXT: sub x9, x9, #4080 -; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000 -; CHECK-NEXT: .LBB9_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB9_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB9_1 -; CHECK-NEXT: .LBB9_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 16, align 131072 - store i8* %v, ptr %out, align 8 - ret void -} - -; A small allocation, but with a very large alignment requirement which -; is nevertheless small enough as to not need a loop. -define void @static_16_align_8192(ptr %out) #0 { -; CHECK-LABEL: static_16_align_8192: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: sub x9, x9, #4080 -; CHECK-NEXT: and sp, x9, #0xffffffffffffe000 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 16, align 8192 - store i8* %v, ptr %out, align 8 - ret void -} - -; A large allocation with a very large alignment requirement which -; is nevertheless small enough as to not need a loop. -define void @static_32752_align_32k(ptr %out) #0 { -; CHECK-LABEL: static_32752_align_32k: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #7, lsl #12 // =28672 -; CHECK-NEXT: sub x9, x9, #4080 -; CHECK-NEXT: and sp, x9, #0xffffffffffff8000 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 32752, align 32768 - store i8* %v, ptr %out, align 8 - ret void -} - -attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="65536" "frame-pointer"="none" } \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll deleted file mode 100644 index 673f9038a35fe6afcc1b0c5247b92b4be9afc344..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc --stop-after=finalize-isel -o - | FileCheck %s -target triple = "aarch64-linux" - -; Check dynamic stack allocation and probing instructions do not have -; the FrameSetup flag. - -; CHECK-NOT: frame-setup -define void @no_frame_setup(i64 %size, ptr %out) #0 { - %v = alloca i8, i64 %size, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll deleted file mode 100644 index 3cbcf7749b2ae977346da9e196f26081c33472d5..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll +++ /dev/null @@ -1,365 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s - -; Dynamically-sized allocation, needs a loop which can handle any size at -; runtime. The final iteration of the loop will temporarily put SP below the -; target address, but this doesn't break any of the ABI constraints on the -; stack, and also doesn't probe below the target SP value. -define void @dynamic(i64 %size, ptr %out) #0 { -; CHECK-LABEL: dynamic: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x8 -; CHECK-NEXT: b.le .LBB0_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB0_1 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret - %v = alloca i8, i64 %size, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; This function has a fixed-size stack slot and a dynamic one. The fixed size -; slot isn't large enough that we would normally probe it, but we need to do so -; here otherwise the gap between the CSR save and the first probe of the -; dynamic allocation could be too far apart when the size of the dynamic -; allocation is close to the guard size. -define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { -; CHECK-LABEL: dynamic_fixed: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: str xzr, [sp, #-64]! -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: sub x10, x29, #64 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: str x10, [x1] -; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x8 -; CHECK-NEXT: b.le .LBB1_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB1_1 -; CHECK-NEXT: .LBB1_3: -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str x8, [x2] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret - %v1 = alloca i8, i64 64, align 1 - store ptr %v1, ptr %out1, align 8 - %v2 = alloca i8, i64 %size, align 1 - store ptr %v2, ptr %out2, align 8 - ret void -} - -; Dynamic allocation, with an alignment requirement greater than the alignment -; of SP. Done by ANDing the target SP with a constant to align it down, then -; doing the loop as normal. Note that we also re-align the stack in the prolog, -; which isn't actually needed because the only aligned allocations are dynamic, -; this is done even without stack probing. -define void @dynamic_align_64(i64 %size, ptr %out) #0 { -; CHECK-LABEL: dynamic_align_64: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 32 -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: sub x9, sp, #32 -; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0 -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: mov x19, sp -; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0 -; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x8 -; CHECK-NEXT: b.le .LBB2_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB2_1 -; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret - %v = alloca i8, i64 %size, align 64 - store ptr %v, ptr %out, align 8 - ret void -} - -; Dynamic allocation, with an alignment greater than the stack guard size. The -; only difference to the dynamic allocation is the constant used for aligning -; the target SP, the loop will probe the whole allocation without needing to -; know about the alignment padding. -define void @dynamic_align_8192(i64 %size, ptr %out) #0 { -; CHECK-LABEL: dynamic_align_8192: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 32 -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: sub x9, x9, #4064 -; CHECK-NEXT: and x9, x9, #0xffffffffffffe000 -; CHECK-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB3_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB3_1 -; CHECK-NEXT: .LBB3_3: -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: mov x19, sp -; CHECK-NEXT: and x8, x8, #0xffffffffffffe000 -; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x8 -; CHECK-NEXT: b.le .LBB3_6 -; CHECK-NEXT: // %bb.5: // in Loop: Header=BB3_4 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB3_4 -; CHECK-NEXT: .LBB3_6: -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret - %v = alloca i8, i64 %size, align 8192 - store ptr %v, ptr %out, align 8 - ret void -} - -; For 64k guard pages, the only difference is the constant subtracted from SP -; in the loop. -define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536" { -; CHECK-LABEL: dynamic_64k_guard: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: cmp sp, x8 -; CHECK-NEXT: b.le .LBB4_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB4_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB4_1 -; CHECK-NEXT: .LBB4_3: -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret - %v = alloca i8, i64 %size, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; If a function has variable-sized stack objects, then any function calls which -; need to pass arguments on the stack must allocate the stack space for them -; dynamically, to ensure they are at the bottom of the frame. We need to probe -; that space when it is larger than the unprobed space allowed by the ABI (1024 -; bytes), so this needs a very large number of arguments. -define void @no_reserved_call_frame(i64 %n) #0 { -; CHECK-LABEL: no_reserved_call_frame: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: lsl x9, x0, #2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, x9, #15 -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: sub x0, x8, x9 -; CHECK-NEXT: .LBB5_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x0 -; CHECK-NEXT: b.le .LBB5_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB5_1 -; CHECK-NEXT: .LBB5_3: // %entry -; CHECK-NEXT: mov sp, x0 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1104 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1104 -; CHECK-NEXT: bl callee_stack_args -; CHECK-NEXT: add sp, sp, #1104 -; CHECK-NEXT: add sp, sp, #1104 -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i32, i64 %n - call void @callee_stack_args(ptr %v, [138 x i64] undef) - ret void -} - -; Same as above but without a variable-sized allocation, so the reserved call -; frame can be folded into the fixed-size allocation in the prologue. -define void @reserved_call_frame(i64 %n) #0 { -; CHECK-LABEL: reserved_call_frame: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 32 -; CHECK-NEXT: .cfi_offset w28, -16 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: sub sp, sp, #1504 -; CHECK-NEXT: add x0, sp, #1104 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: bl callee_stack_args -; CHECK-NEXT: add sp, sp, #1504 -; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w28 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i32, i64 100 - call void @callee_stack_args(ptr %v, [138 x i64] undef) - ret void -} - -declare void @callee_stack_args(ptr, [138 x i64]) - -; Dynamic allocation of SVE vectors -define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" { -; CHECK-LABEL: dynamic_sve: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 32 -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: mov x10, #15 // =0xf -; CHECK-NEXT: madd x9, x0, x9, x10 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x8 -; CHECK-NEXT: b.le .LBB7_3 -; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB7_1 -; CHECK-NEXT: .LBB7_3: -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret - %v = alloca , i64 %size, align 16 - store ptr %v, ptr %out, align 8 - ret void -} - -attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir deleted file mode 100644 index 9a173be5857ed73bb3efd709008160b7c3243885..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir +++ /dev/null @@ -1,144 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -run-pass=prologepilog %s -o - | FileCheck %s -# Regression test for a crash when the probing instruction -# to replace is last in the block. ---- | - source_filename = "tt.ll" - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64-linux" - - declare i1 @g(ptr) - - define void @f(ptr %out) #0 { - entry: - %p = alloca i32, i32 50000, align 4 - br label %loop - - loop: ; preds = %loop, %entry - %c = call i1 @g(ptr %p) - br i1 %c, label %loop, label %exit - - exit: ; preds = %loop - ret void - } - - attributes #0 = { uwtable "frame-pointer"="none" "probe-stack"="inline-asm" "target-features"="+sve" } - -... ---- -name: f -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: true -registers: [] -liveins: [] -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: true - hasCalls: true - stackProtector: '' - functionContext: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 200000 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: p, type: default, offset: 0, size: 200000, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: -200000, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: f - ; CHECK: bb.0.entry: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $lr, $fp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 - ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 48, 12 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 196624 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.entry: - ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: liveins: $x9 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12 - ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 - ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv - ; CHECK-NEXT: frame-setup Bcc 1, %bb.3, implicit $nzcv - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.entry: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp - ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 3392, 0 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 200016 - ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.loop: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x0 = ADDXri $sp, 0, 0 - ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: TBNZW killed renamable $w0, 0, %bb.1 - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.exit: - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 48, 12 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 3408 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 3392, 0 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 - ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 - ; CHECK-NEXT: RET_ReallyLR - bb.0.entry: - successors: %bb.1(0x80000000) - - - bb.1.loop: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - $x0 = ADDXri %stack.0.p, 0, 0 - BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - TBNZW killed renamable $w0, 0, %bb.1 - B %bb.2 - - bb.2.exit: - RET_ReallyLR \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll deleted file mode 100644 index e765d071e722041a80fbe72955d5eb5c4728a815..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll +++ /dev/null @@ -1,724 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s - -; Test prolog sequences for stack probing when SVE objects are involved. - -; The space for SVE objects needs probing in the general case, because -; the stack adjustment may happen to be too big (i.e. greater than the -; probe size) to allocate with a single `addvl`. -; When we do know that the stack adjustment cannot exceed the probe size -; we can avoid emitting a probe loop and emit a simple `addvl; str` -; sequence instead. - -define void @sve_1_vector(ptr %out) #0 { -; CHECK-LABEL: sve_1_vector: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec = alloca , align 16 - ret void -} - -; As above, but with 4 SVE vectors of stack space. -define void @sve_4_vector(ptr %out) #0 { -; CHECK-LABEL: sve_4_vector: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec1 = alloca , align 16 - %vec2 = alloca , align 16 - %vec3 = alloca , align 16 - %vec4 = alloca , align 16 - ret void -} - -; As above, but with 16 SVE vectors of stack space. -; The stack adjustment is less than or equal to 16 x 256 = 4096, so -; we can allocate the locals at once. -define void @sve_16_vector(ptr %out) #0 { -; CHECK-LABEL: sve_16_vector: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-16 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 128 * VG -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: addvl sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec1 = alloca , align 16 - %vec2 = alloca , align 16 - %vec3 = alloca , align 16 - %vec4 = alloca , align 16 - %vec5 = alloca , align 16 - %vec6 = alloca , align 16 - %vec7 = alloca , align 16 - %vec8 = alloca , align 16 - %vec9 = alloca , align 16 - %vec10 = alloca , align 16 - %vec11 = alloca , align 16 - %vec12 = alloca , align 16 - %vec13 = alloca , align 16 - %vec14 = alloca , align 16 - %vec15 = alloca , align 16 - %vec16 = alloca , align 16 - ret void -} - -; As above, but with 17 SVE vectors of stack space. Now we need -; a probing loops since stack adjustment may be greater than -; the probe size (17 x 256 = 4354 bytes) -; TODO: Allocating `k*16+r` SVE vectors can be unrolled into -; emiting the `k + r` sequences of `addvl sp, sp, #-N; str xzr, [sp]` -define void @sve_17_vector(ptr %out) #0 { -; CHECK-LABEL: sve_17_vector: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl x9, sp, #-17 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 136 * VG -; CHECK-NEXT: .LBB3_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB3_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB3_1 -; CHECK-NEXT: .LBB3_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: addvl sp, sp, #17 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec1 = alloca , align 16 - %vec2 = alloca , align 16 - %vec3 = alloca , align 16 - %vec4 = alloca , align 16 - %vec5 = alloca , align 16 - %vec6 = alloca , align 16 - %vec7 = alloca , align 16 - %vec8 = alloca , align 16 - %vec9 = alloca , align 16 - %vec10 = alloca , align 16 - %vec11 = alloca , align 16 - %vec12 = alloca , align 16 - %vec13 = alloca , align 16 - %vec14 = alloca , align 16 - %vec15 = alloca , align 16 - %vec16 = alloca , align 16 - %vec17 = alloca , align 16 - ret void -} - -; Space for callee-saved SVE register is allocated similarly to allocating -; space for SVE locals. When we know the stack adjustment cannot exceed the -; probe size we can skip the explict probe, since saving SVE registers serves -; as an implicit probe. -define void @sve_1v_csr( %a) #0 { -; CHECK-LABEL: sve_1v_csr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: .cfi_restore z8 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{z8}" () - ret void -} - -define void @sve_4v_csr( %a) #0 { -; CHECK-LABEL: sve_4v_csr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: .cfi_restore z8 -; CHECK-NEXT: .cfi_restore z9 -; CHECK-NEXT: .cfi_restore z10 -; CHECK-NEXT: .cfi_restore z11 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{z8},~{z9},~{z10},~{z11}" () - ret void -} - -define void @sve_16v_csr( %a) #0 { -; CHECK-LABEL: sve_16v_csr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-16 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 128 * VG -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill -; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: .cfi_restore z8 -; CHECK-NEXT: .cfi_restore z9 -; CHECK-NEXT: .cfi_restore z10 -; CHECK-NEXT: .cfi_restore z11 -; CHECK-NEXT: .cfi_restore z12 -; CHECK-NEXT: .cfi_restore z13 -; CHECK-NEXT: .cfi_restore z14 -; CHECK-NEXT: .cfi_restore z15 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23}" () - ret void -} - -define void @sve_1p_csr( %a) #0 { -; CHECK-LABEL: sve_1p_csr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{p8}" () - ret void -} - -define void @sve_4p_csr( %a) #0 { -; CHECK-LABEL: sve_4p_csr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str p11, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr p11, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{p8},~{p9},~{p10},~{p11}" () - ret void -} - -define void @sve_16v_1p_csr( %a) #0 { -; CHECK-LABEL: sve_16v_1p_csr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl x9, sp, #-17 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 136 * VG -; CHECK-NEXT: .LBB9_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB9_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB9_1 -; CHECK-NEXT: .LBB9_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #17 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: .cfi_restore z8 -; CHECK-NEXT: .cfi_restore z9 -; CHECK-NEXT: .cfi_restore z10 -; CHECK-NEXT: .cfi_restore z11 -; CHECK-NEXT: .cfi_restore z12 -; CHECK-NEXT: .cfi_restore z13 -; CHECK-NEXT: .cfi_restore z14 -; CHECK-NEXT: .cfi_restore z15 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{p8},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23}" () - ret void -} - -; A SVE vector and a 16-byte fixed size object. -define void @sve_1_vector_16_arr(ptr %out) #0 { -; CHECK-LABEL: sve_1_vector_16_arr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec = alloca , align 16 - %arr = alloca i8, i64 16, align 1 - ret void -} - -; A large SVE stack object and a large stack slot, both of which need probing. -; TODO: This could be optimised by combining the fixed-size offset into the -; loop. -define void @sve_1_vector_4096_arr(ptr %out) #0 { -; CHECK-LABEL: sve_1_vector_4096_arr: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #3, lsl #12 // =12288 -; CHECK-NEXT: .cfi_def_cfa w9, 12304 -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x79, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 12304 + 256 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x79, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 12304 + 512 * VG -; CHECK-NEXT: .LBB11_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB11_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB11_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB11_1 -; CHECK-NEXT: .LBB11_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x8f, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x88, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 12304 + 264 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 12304 + 16 * VG -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: .cfi_def_cfa wsp, 12304 -; CHECK-NEXT: add sp, sp, #3, lsl #12 // =12288 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec = alloca , align 16 - %arr = alloca i8, i64 12288, align 1 - ret void -} - -; Not tested: SVE stack objects with alignment >16 bytes, which isn't currently -; supported even without stack-probing. - -; An SVE vector, and a 16-byte fixed size object, which -; has a large alignment requirement. -define void @sve_1_vector_16_arr_align_8192(ptr %out) #0 { -; CHECK-LABEL: sve_1_vector_16_arr_align_8192: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: sub x9, x9, #4080 -; CHECK-NEXT: addvl x9, x9, #-1 -; CHECK-NEXT: and x9, x9, #0xffffffffffffe000 -; CHECK-NEXT: .LBB12_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB12_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB12_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB12_1 -; CHECK-NEXT: .LBB12_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec = alloca , align 16 - %arr = alloca i8, i64 16, align 8192 - ret void -} - -; With 64k guard pages, we can allocate bigger SVE space without a probing loop. -define void @sve_1024_64k_guard(ptr %out) #0 "stack-probe-size"="65536" { -; CHECK-LABEL: sve_1024_64k_guard: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 256 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 512 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x06, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 768 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1024 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0a, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1280 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0c, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1536 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1792 * VG -; CHECK-NEXT: addvl sp, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2048 * VG -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1800 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x0c, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1552 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x98, 0x0a, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1304 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1056 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x06, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 808 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xb0, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 560 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xb8, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 312 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG -; CHECK-NEXT: addvl sp, sp, #8 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec = alloca , align 16 - ret void -} - -define void @sve_1028_64k_guard(ptr %out) #0 "stack-probe-size"="65536" { -; CHECK-LABEL: sve_1028_64k_guard: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl x9, sp, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 256 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 512 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x06, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 768 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 1024 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0a, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 1280 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0c, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 1536 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 1792 * VG -; CHECK-NEXT: addvl x9, x9, #-32 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 2048 * VG -; CHECK-NEXT: addvl x9, x9, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 2056 * VG -; CHECK-NEXT: .LBB14_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB14_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB14_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB14_1 -; CHECK-NEXT: .LBB14_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1808 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x98, 0x0c, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1560 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x0a, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1312 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1064 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xb0, 0x06, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 816 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xb8, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 568 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 320 * VG -; CHECK-NEXT: addvl sp, sp, #31 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 72 * VG -; CHECK-NEXT: addvl sp, sp, #9 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec = alloca , align 16 - %vec1 = alloca , align 16 - ret void -} - -; With 5 SVE vectors of stack space the unprobed area -; at the top of the stack can exceed 1024 bytes (5 x 256 == 1280), -; hence we need to issue a probe. -define void @sve_5_vector(ptr %out) #0 { -; CHECK-LABEL: sve_5_vector: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-5 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 40 * VG -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: addvl sp, sp, #5 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %vec1 = alloca , align 16 - %vec2 = alloca , align 16 - %vec3 = alloca , align 16 - %vec4 = alloca , align 16 - %vec5 = alloca , align 16 - ret void -} - -; Test with a 14 scalable bytes (so up to 14 * 16 = 224) of unprobed -; are bellow the save location of `p9`. -define void @sve_unprobed_area( %a, i32 %n) #0 { -; CHECK-LABEL: sve_unprobed_area: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG -; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: .cfi_restore z8 -; CHECK-NEXT: .cfi_restore z9 -; CHECK-NEXT: .cfi_restore z10 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - call void asm sideeffect "", "~{z8},~{z9},~{z10},~{p9}" () - - %v0 = alloca , align 16 - %v1 = alloca , align 16 - %v2 = alloca , align 16 - %v3 = alloca , align 16 - - ret void -} - -attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" "target-features"="+sve" } \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/stack-probing.ll b/llvm/test/CodeGen/AArch64/stack-probing.ll deleted file mode 100644 index 95001450622f433a2a5ae05f330d5789ff8f58c5..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/stack-probing.ll +++ /dev/null @@ -1,539 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s - -; Tests for prolog sequences for stack probing, when using a 4KiB stack guard. - -; The stack probing parameters in function attributes take precedence over -; ones in the module flags. - -; Small stack frame, no probing required. -define void @static_64(ptr %out) #0 { -; CHECK-LABEL: static_64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 64, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; At 256 bytes we start to always create a frame pointer. No frame smaller then -; this needs a probe, so we can use the saving of at least one CSR as a probe -; at the top of our frame. -define void @static_256(ptr %out) #0 { -; CHECK-LABEL: static_256: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #272 -; CHECK-NEXT: .cfi_def_cfa_offset 272 -; CHECK-NEXT: str x29, [sp, #256] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #272 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 256, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; At 1024 bytes, this is the largest frame which doesn't need probing. -define void @static_1024(ptr %out) #0 { -; CHECK-LABEL: static_1024: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 1024, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; At 1024+16 bytes, this is the smallest frame which needs probing. -define void @static_1040(ptr %out) #0 { -; CHECK-LABEL: static_1040: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 1056 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 1040, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 4k bytes is the largest frame we can probe in one go. -define void @static_4096(ptr %out) #0 { -; CHECK-LABEL: static_4096: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 4112 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 4096, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 4k+16 bytes, still needs just one probe. -define void @static_4112(ptr %out) #0 { -; CHECK-LABEL: static_4112: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 4112 -; CHECK-NEXT: str xzr, [sp], #-16 -; CHECK-NEXT: .cfi_def_cfa_offset 4128 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 4112, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 4k+1024 bytes, the largest frame which needs just one probe. -define void @static_5120(ptr %out) #0 { -; CHECK-LABEL: static_5120: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 4112 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 5136 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 5120, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 4k+1024+16, the smallest frame which needs two probes. -define void @static_5136(ptr %out) #0 { -; CHECK-LABEL: static_5136: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 4112 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 5152 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 1056 -; CHECK-NEXT: add sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 5136, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 2*4k+1024, the largest frame needing two probes -define void @static_9216(ptr %out) #0 { -; CHECK-LABEL: static_9216: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 4112 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 8208 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 9232 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 9216, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 5*4k-16, the largest frame probed without a loop -define void @static_20464(ptr %out) #0 { -; CHECK-LABEL: static_20464: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 4112 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 8208 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 12304 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: .cfi_def_cfa_offset 16400 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #4080 -; CHECK-NEXT: .cfi_def_cfa_offset 20480 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #4, lsl #12 // =16384 -; CHECK-NEXT: .cfi_def_cfa_offset 4096 -; CHECK-NEXT: add sp, sp, #4080 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 20464, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 5*4k, the smallest frame probed with a loop -define void @static_20480(ptr %out) #0 { -; CHECK-LABEL: static_20480: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 -; CHECK-NEXT: .cfi_def_cfa w9, 20496 -; CHECK-NEXT: .LBB10_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.ne .LBB10_1 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 20480, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 5*4k + 1024, large enough to use a loop, but not a multiple of 4KiB -; so has a reminder, but no extra probe. -define void @static_21504(ptr %out) #0 { -; CHECK-LABEL: static_21504: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 -; CHECK-NEXT: .cfi_def_cfa w9, 20496 -; CHECK-NEXT: .LBB11_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.ne .LBB11_1 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 21520 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 -; CHECK-NEXT: .cfi_def_cfa_offset 1040 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 21504, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; 5*4k+1040, large enough to use a loop, has a reminder and -; an extra probe. -define void @static_21520(ptr %out) #0 { -; CHECK-LABEL: static_21520: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 -; CHECK-NEXT: .cfi_def_cfa w9, 20496 -; CHECK-NEXT: .LBB12_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.ne .LBB12_1 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: sub sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 21536 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 -; CHECK-NEXT: .cfi_def_cfa_offset 1056 -; CHECK-NEXT: add sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 21520, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; A small allocation, but with a very large alignment requirement. We do this -; by moving SP far enough that a sufficiently-aligned block will exist -; somewhere in the stack frame, so must probe the whole of that larger SP move. -define void @static_16_align_8192(ptr %out) #0 { -; CHECK-LABEL: static_16_align_8192: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: sub x9, x9, #4080 -; CHECK-NEXT: and x9, x9, #0xffffffffffffe000 -; CHECK-NEXT: .LBB13_1: // %entry -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -; CHECK-NEXT: cmp sp, x9 -; CHECK-NEXT: b.le .LBB13_3 -; CHECK-NEXT: // %bb.2: // %entry -; CHECK-NEXT: // in Loop: Header=BB13_1 Depth=1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: b .LBB13_1 -; CHECK-NEXT: .LBB13_3: // %entry -; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 16, align 8192 - store ptr %v, ptr %out, align 8 - ret void -} - -; A small allocation with a very large alignment requirement, but -; nevertheless small enough as to not need a loop. -define void @static_16_align_2048(ptr %out) #0 { -; CHECK-LABEL: static_16_align_2048: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #2032 -; CHECK-NEXT: and sp, x9, #0xfffffffffffff800 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 16, align 2048 - store ptr %v, ptr %out, align 8 - ret void -} - -; A large(-ish) allocation with a very large alignment requirement, but -; nevertheless small enough as to not need a loop. -define void @static_2032_align_2048(ptr %out) #0 { -; CHECK-LABEL: static_2032_align_2048: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #2032 -; CHECK-NEXT: and sp, x9, #0xfffffffffffff800 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 2032, align 2048 - store ptr %v, ptr %out, align 8 - ret void -} - -; Test stack probing is enabled by module flags -define void @static_9232(ptr %out) uwtable(async) { -; CHECK-LABEL: static_9232: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub sp, sp, #2, lsl #12 // =8192 -; CHECK-NEXT: .cfi_def_cfa_offset 8208 -; CHECK-NEXT: sub sp, sp, #800 -; CHECK-NEXT: .cfi_def_cfa_offset 9008 -; CHECK-NEXT: str xzr, [sp], #-240 -; CHECK-NEXT: .cfi_def_cfa_offset 9248 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192 -; CHECK-NEXT: .cfi_def_cfa_offset 1056 -; CHECK-NEXT: add sp, sp, #1040 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i64 9232, align 1 - store ptr %v, ptr %out, align 8 - ret void -} - -; Test for a tight upper bound on the amount of stack adjustment -; due to stack realignment. No probes should appear. -define void @static_1008(ptr %out) #0 { -; CHECK-LABEL: static_1008: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #1008 -; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -entry: - %v = alloca i8, i32 1008, align 32 - store ptr %v, ptr %out, align 8 - ret void -} - -attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" } - -!llvm.module.flags = !{!0, !1} - -!0 = !{i32 4, !"probe-stack", !"inline-asm"} -!1 = !{i32 8, !"stack-probe-size", i32 9000} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index 235364ac232183c53157ad6121fb9f1747949664..a97649523565dc0f3fdb7053ee49cbdb5b68f68d 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -56,8 +56,8 @@ define float @foo2(ptr %x0, ptr %x1) nounwind { ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] @@ -699,8 +699,8 @@ define void @verify_all_operands_are_initialised() { ; CHECK-LABEL: verify_all_operands_are_initialised: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll index b3529549c22b9cd0d19597687bbc6cc78a7c26ae..31ff9287046cd5bc3ad89ea56a95610b1773d95c 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll @@ -9,8 +9,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] @@ -59,8 +59,8 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: ldr q1, [x0, #64] ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: ldr q0, [x0, #80] diff --git a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll deleted file mode 100644 index 8f665924577f5bcb6a5d530c360cb2967cf0b9c8..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll +++ /dev/null @@ -1,43 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=1 -mtriple=x86_64-linux-gnu -o - %s | FileCheck %s - -declare void @use_addr(ptr) -declare ptr @llvm.stacksave.p0() -declare void @llvm.stackrestore.p0(ptr) - -define void @test_scoped_alloca(i64 %n) { -; CHECK-LABEL: test_scoped_alloca: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_offset %rbx, -32 -; CHECK-NEXT: .cfi_offset %r14, -24 -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: callq llvm.stacksave.p0 -; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: movq %rsp, %rdi -; CHECK-NEXT: imulq $1, %rbx, %rax -; CHECK-NEXT: addq $15, %rax -; CHECK-NEXT: andq $-16, %rax -; CHECK-NEXT: subq %rax, %rdi -; CHECK-NEXT: movq %rdi, %rsp -; CHECK-NEXT: callq use_addr -; CHECK-NEXT: movq %r14, %rdi -; CHECK-NEXT: callq llvm.stackrestore.p0 -; CHECK-NEXT: leaq -16(%rbp), %rsp -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq - %sp = call ptr @llvm.stacksave.p0() - %addr = alloca i8, i64 %n - call void @use_addr(ptr %addr) - call void @llvm.stackrestore.p0(ptr %sp) - ret void -}