From 5e74778aed7a92f9fa134e767f2dbc1555e37ebb Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Tue, 1 Aug 2023 23:55:31 +0000 Subject: [PATCH 1/3] [AArch64][CodeGen] Avoid inverting hot branches during relaxation Current behavior for relaxing out-of-range conditional branches is to invert the conditional and insert a fallthrough unconditional branch to the original destination. This approach biases the branch predictor in the wrong direction, which can degrading performance. Machine function splitting introduces many rarely-taken cross-section conditional branches, which are improperly relaxed. Avoid inverting these branches; instead, retarget them to trampolines at the end of the function. Doing so increases the runtime cost of jumping to cold code but eliminates the misprediction cost of jumping to hot code. Differential Revision: https://reviews.llvm.org/D156837 --- llvm/lib/CodeGen/BranchRelaxation.cpp | 63 +++- .../AArch64/branch-relax-cross-section.mir | 268 +++++++++++++++++- 2 files changed, 328 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 378f8bfda203..2995732e0aa8 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -79,6 +79,10 @@ class BranchRelaxation : public MachineFunctionPass { }; SmallVector BlockInfo; + + // The basic block after which trampolines are inserted. This is the last + // basic block that isn't in the cold section. + MachineBasicBlock *TrampolineInsertionPoint = nullptr; std::unique_ptr RS; LivePhysRegs LiveRegs; @@ -166,16 +170,27 @@ LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { void BranchRelaxation::scanFunction() { BlockInfo.clear(); BlockInfo.resize(MF->getNumBlockIDs()); + TrampolineInsertionPoint = nullptr; // First thing, compute the size of all basic blocks, and see if the function // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineBasicBlock &MBB : *MF) + // instructions in the inline assembly. At the same time, place the + // trampoline insertion point at the end of the hot portion of the function. + for (MachineBasicBlock &MBB : *MF) { BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB); + if (MBB.getSectionID() != MBBSectionID::ColdSectionID) + TrampolineInsertionPoint = &MBB; + } + // Compute block offsets and known bits. adjustBlockOffsets(*MF->begin()); + + if (TrampolineInsertionPoint == nullptr) { + LLVM_DEBUG(dbgs() << " No suitable trampoline insertion point found in " + << MF->getName() << ".\n"); + } } /// computeBlockSize - Compute the size for MBB. @@ -376,6 +391,50 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { assert(!Fail && "branches to be relaxed must be analyzable"); (void)Fail; + // Since cross-section conditional branches to the cold section are rarely + // taken, try to avoid inverting the condition. Instead, add a "trampoline + // branch", which unconditionally branches to the branch destination. Place + // the trampoline branch at the end of the function and retarget the + // conditional branch to the trampoline. + // tbz L1 + // => + // tbz L1Trampoline + // ... + // L1Trampoline: b L1 + if (MBB->getSectionID() != TBB->getSectionID() && + TBB->getSectionID() == MBBSectionID::ColdSectionID && + TrampolineInsertionPoint != nullptr) { + // If the insertion point is out of range, we can't put a trampoline there. + NewBB = + createNewBlockAfter(*TrampolineInsertionPoint, MBB->getBasicBlock()); + + if (isBlockInRange(MI, *NewBB)) { + LLVM_DEBUG(dbgs() << " Retarget destination to trampoline at " + << NewBB->back()); + + insertUncondBranch(NewBB, TBB); + + // Update the successor lists to include the trampoline. + MBB->replaceSuccessor(TBB, NewBB); + NewBB->addSuccessor(TBB); + + // Replace branch in the current (MBB) block. + removeBranch(MBB); + insertBranch(MBB, NewBB, FBB, Cond); + + TrampolineInsertionPoint = NewBB; + finalizeBlockChanges(MBB, NewBB); + return true; + } + + LLVM_DEBUG( + dbgs() << " Trampoline insertion point out of range for Bcc from " + << printMBBReference(*MBB) << " to " << printMBBReference(*TBB) + << ".\n"); + TrampolineInsertionPoint->setIsEndSection(NewBB->isEndSection()); + MF->erase(NewBB); + } + // Add an unconditional branch to the destination and invert the branch // condition to jump over it: // tbz L1 diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir index 1cf307cd16ec..231bc886dd3b 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s --- | declare i32 @bar() @@ -21,6 +22,73 @@ br label %end } + define void @tbz_hot_to_cold(i1 zeroext %0) { + br i1 %0, label %hot_block, label %cold_block + + hot_block: ; preds = %1 + %2 = call i32 @baz() + br label %end + + end: ; preds = %cold_block, %hot_block + %3 = tail call i32 @qux() + ret void + + cold_block: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + define void @tbz_no_valid_tramp(i1 zeroext %0) { + br i1 %0, label %hot, label %cold + + hot: ; preds = %1 + %2 = call i32 @baz() + call void asm sideeffect ".space 1024", ""() + br label %end + + end: ; preds = %cold, %hot + %3 = tail call i32 @qux() + ret void + + cold: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + define void @tbz_cold_to_hot(i1 zeroext %0) #0 { + br i1 %0, label %cold_block, label %hot_block + + cold_block: ; preds = %1 + %2 = call i32 @baz() + br label %end + + end: ; preds = %hot_block, %cold_block + %3 = tail call i32 @qux() + ret void + + hot_block: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + define void @tbz_tramp_pushed_oob(i1 zeroext %0, i1 zeroext %1) { + entry: + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + br i1 %0, label %unrelaxable, label %cold + + unrelaxable: ; preds = %entry + br i1 %1, label %end, label %cold + + end: ; preds = %unrelaxable + call void asm sideeffect ".space 996", ""() + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void + + cold: ; preds = %entry, %unrelaxable + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void + } + ... --- name: relax_tbz @@ -69,3 +137,201 @@ body: | early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp ... +--- +name: tbz_hot_to_cold +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + ; CHECK-LABEL: name: tbz_hot_to_cold + ; COM: Check that branch relaxation relaxes cross-section conditional + ; COM: branches by creating trampolines after all other hot basic blocks. + ; CHECK: bb.0 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK: TBZW + ; CHECK-SAME: %bb.3 + ; CHECK: bb.1.hot_block: + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cold_block (bbsections Cold): + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + bb.0 (%ir-block.1): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + TBZW killed renamable $w0, 0, %bb.2 + + bb.1.hot_block: + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.cold_block (bbsections Cold): + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tbz_no_valid_tramp +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: tbz_no_valid_tramp + ; COM: Check that branch relaxation doesn't insert a trampoline if there is no + ; COM: viable insertion location. + ; CHECK: bb.0 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK: CBNZW + ; CHECK-SAME: %bb.1 + ; CHECK-NEXT: B + ; CHECK-SAME: %bb.3 + ; CHECK: bb.1.hot: + ; CHECK: TCRETURNdi + ; CHECK: bb.2.cold (bbsections Cold): + ; CHECK: TCRETURNdi + bb.0 (%ir-block.1): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + CBZW killed renamable $w0, %bb.2 + + bb.1.hot: + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + INLINEASM &".space 1024", 1 /* sideeffect attdialect */ + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.cold (bbsections Cold): + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tbz_cold_to_hot +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: tbz_cold_to_hot + ; COM: Check that relaxation of conditional branches from the Cold section to + ; COM: the Hot section doesn't modify the Hot section. + ; CHECK: bb.0 (%ir-block.1, bbsections Cold): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: CBNZW + ; CHECK-SAME: %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK: bb.1.cold_block (bbsections Cold): + ; CHECK: TCRETURNdi + ; CHECK: bb.2.hot_block: + ; CHECK: TCRETURNdi + bb.0 (%ir-block.1, bbsections Cold): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + CBZW killed renamable $w0, %bb.2 + + bb.1.cold_block (bbsections Cold): + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.hot_block: + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tbz_tramp_pushed_oob +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } + - { reg: '$w1', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; INDIRECT-LABEL: name: tbz_tramp_pushed_oob + ; COM: Check that a conditional branch to a trampoline is properly relaxed + ; COM: if the trampoline is pushed out of range. + ; INDIRECT: bb.0.entry: + ; INDIRECT-NEXT: successors: %bb.1(0x40000000), %[[TRAMP1:bb.[0-9]+]](0x40000000) + ; INDIRECT: TBNZW + ; INDIRECT-SAME: %bb.1 + ; INDIRECT-NEXT: B{{ }} + ; INDIRECT-SAME: %[[TRAMP1]] + ; INDIRECT: bb.1.unrelaxable: + ; INDIRECT-NEXT: successors: %bb.2(0x40000000), %[[TRAMP2:bb.[0-9]+]](0x40000000) + ; INDIRECT: TBNZW + ; INDIRECT-SAME: %bb.2 + ; INDIRECT: [[TRAMP2]] + ; INDIRECT-NEXT: successors: %bb.3(0x80000000) + ; INDIRECT: bb.2.end: + ; INDIRECT: TCRETURNdi + ; INDIRECT: [[TRAMP1]].entry: + ; INDIRECT: successors: %bb.3(0x80000000) + ; INDIRECT-NOT: bbsections Cold + ; INDIRECT: bb.3.cold (bbsections Cold): + ; INDIRECT: TCRETURNdi + + bb.0.entry (%ir-block.entry): + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $w0, $w1, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + TBZW killed renamable $w0, 0, %bb.3 + + bb.1.unrelaxable: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $w1, $x16 + + TBNZW killed renamable $w1, 0, %bb.2 + + B %bb.3 + + bb.2.end: + liveins: $x16 + + INLINEASM &".space 996", 1 /* sideeffect attdialect */ + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.3.cold (bbsections Cold): + liveins: $x16 + + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... -- Gitee From 2ecf41011907a3fe5349cb6ee065bc55903a411c Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Fri, 25 Aug 2023 19:19:20 +0000 Subject: [PATCH 2/3] [CodeGen][AArch64] Precommit tests for D156767 (NFC) Differential Revision: https://reviews.llvm.org/D158871 --- llvm/test/CodeGen/AArch64/branch-relax-b.ll | 46 ++- .../AArch64/branch-relax-cross-section.mir | 376 +++++++++++++++++- 2 files changed, 401 insertions(+), 21 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/branch-relax-b.ll b/llvm/test/CodeGen/AArch64/branch-relax-b.ll index cbf0152d811d..3f1501c01451 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-b.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-b.ll @@ -44,16 +44,16 @@ define void @relax_b_spill() { ; CHECK-NEXT: // %bb.4: // %entry ; CHECK-NEXT: str [[SPILL_REGISTER:x[0-9]+]], [sp, ; CHECK-SAME: -16]! -; CHECK-NEXT: adrp [[SPILL_REGISTER:x[0-9]+]], .LBB1_5 -; CHECK-NEXT: add [[SPILL_REGISTER:x[0-9]+]], [[SPILL_REGISTER:x[0-9]+]], :lo12:.LBB1_5 -; CHECK-NEXT: br [[SPILL_REGISTER:x[0-9]+]] +; CHECK-NEXT: adrp [[SPILL_REGISTER]], .LBB1_5 +; CHECK-NEXT: add [[SPILL_REGISTER]], [[SPILL_REGISTER]], :lo12:.LBB1_5 +; CHECK-NEXT: br [[SPILL_REGISTER]] ; CHECK-NEXT: .LBB1_1: // %iftrue ; CHECK-NEXT: //APP ; CHECK-NEXT: .zero 2048 ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_5: // %iffalse -; CHECK-NEXT: ldr [[SPILL_REGISTER:x[0-9]+]], [sp], +; CHECK-NEXT: ldr [[SPILL_REGISTER]], [sp], ; CHECK-SAME: 16 ; CHECK-NEXT: // %bb.2: // %iffalse ; CHECK-NEXT: //APP @@ -135,5 +135,43 @@ iffalse: ret void } +define void @relax_b_x16_taken() { +; CHECK-LABEL: relax_b_x16_taken: // @relax_b_x16_taken +; COM: Pre-commit to record the behavior of relaxing an unconditional +; COM: branch across which x16 is taken. +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: mov x16, #1 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cbnz x16, .LBB2_1 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: adrp [[SCAVENGED_REGISTER2:x[0-9]+]], .LBB2_2 +; CHECK-NEXT: add [[SCAVENGED_REGISTER2]], [[SCAVENGED_REGISTER2]], :lo12:.LBB2_2 +; CHECK-NEXT: br [[SCAVENGED_REGISTER2]] +; CHECK-NEXT: .LBB2_1: // %iftrue +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 2048 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %iffalse +; CHECK-NEXT: //APP +; CHECK-NEXT: // reg use x16 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +entry: + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + + %cmp = icmp eq i64 %x16, 0 + br i1 %cmp, label %iffalse, label %iftrue + +iftrue: + call void asm sideeffect ".space 2048", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void +} + declare i32 @bar() declare i32 @baz() \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir index 231bc886dd3b..e7a56d4c7a4d 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -55,7 +55,7 @@ br label %end } - define void @tbz_cold_to_hot(i1 zeroext %0) #0 { + define void @tbz_cold_to_hot(i1 zeroext %0) { br i1 %0, label %cold_block, label %hot_block cold_block: ; preds = %1 @@ -89,6 +89,91 @@ ret void } + + define void @x16_used_cold_to_hot() { + entry: + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + %cmp = icmp eq i64 %x16, 0 + br i1 %cmp, label %hot, label %cold + + hot: ; preds = %cold, %entry + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void + + cold: ; preds = %entry + call void asm sideeffect ".space 4", ""() + br label %hot + } + + define void @all_used_cold_to_hot() { + entry: + %x0 = call i64 asm sideeffect "mov x0, 1", "={x0}"() + %x1 = call i64 asm sideeffect "mov x1, 1", "={x1}"() + %x2 = call i64 asm sideeffect "mov x2, 1", "={x2}"() + %x3 = call i64 asm sideeffect "mov x3, 1", "={x3}"() + %x4 = call i64 asm sideeffect "mov x4, 1", "={x4}"() + %x5 = call i64 asm sideeffect "mov x5, 1", "={x5}"() + %x6 = call i64 asm sideeffect "mov x6, 1", "={x6}"() + %x7 = call i64 asm sideeffect "mov x7, 1", "={x7}"() + %x8 = call i64 asm sideeffect "mov x8, 1", "={x8}"() + %x9 = call i64 asm sideeffect "mov x9, 1", "={x9}"() + %x10 = call i64 asm sideeffect "mov x10, 1", "={x10}"() + %x11 = call i64 asm sideeffect "mov x11, 1", "={x11}"() + %x12 = call i64 asm sideeffect "mov x12, 1", "={x12}"() + %x13 = call i64 asm sideeffect "mov x13, 1", "={x13}"() + %x14 = call i64 asm sideeffect "mov x14, 1", "={x14}"() + %x15 = call i64 asm sideeffect "mov x15, 1", "={x15}"() + %x17 = call i64 asm sideeffect "mov x17, 1", "={x17}"() + %x18 = call i64 asm sideeffect "mov x18, 1", "={x18}"() + %x19 = call i64 asm sideeffect "mov x19, 1", "={x19}"() + %x20 = call i64 asm sideeffect "mov x20, 1", "={x20}"() + %x21 = call i64 asm sideeffect "mov x21, 1", "={x21}"() + %x22 = call i64 asm sideeffect "mov x22, 1", "={x22}"() + %x23 = call i64 asm sideeffect "mov x23, 1", "={x23}"() + %x24 = call i64 asm sideeffect "mov x24, 1", "={x24}"() + %x25 = call i64 asm sideeffect "mov x25, 1", "={x25}"() + %x26 = call i64 asm sideeffect "mov x26, 1", "={x26}"() + %x27 = call i64 asm sideeffect "mov x27, 1", "={x27}"() + %x28 = call i64 asm sideeffect "mov x28, 1", "={x28}"() + br label %cold + + exit: ; preds = %cold + call void asm sideeffect "# reg use $0", "{x0}"(i64 %x0) + call void asm sideeffect "# reg use $0", "{x1}"(i64 %x1) + call void asm sideeffect "# reg use $0", "{x2}"(i64 %x2) + call void asm sideeffect "# reg use $0", "{x3}"(i64 %x3) + call void asm sideeffect "# reg use $0", "{x4}"(i64 %x4) + call void asm sideeffect "# reg use $0", "{x5}"(i64 %x5) + call void asm sideeffect "# reg use $0", "{x6}"(i64 %x6) + call void asm sideeffect "# reg use $0", "{x7}"(i64 %x7) + call void asm sideeffect "# reg use $0", "{x8}"(i64 %x8) + call void asm sideeffect "# reg use $0", "{x9}"(i64 %x9) + call void asm sideeffect "# reg use $0", "{x10}"(i64 %x10) + call void asm sideeffect "# reg use $0", "{x11}"(i64 %x11) + call void asm sideeffect "# reg use $0", "{x12}"(i64 %x12) + call void asm sideeffect "# reg use $0", "{x13}"(i64 %x13) + call void asm sideeffect "# reg use $0", "{x14}"(i64 %x14) + call void asm sideeffect "# reg use $0", "{x15}"(i64 %x15) + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + call void asm sideeffect "# reg use $0", "{x17}"(i64 %x17) + call void asm sideeffect "# reg use $0", "{x18}"(i64 %x18) + call void asm sideeffect "# reg use $0", "{x19}"(i64 %x19) + call void asm sideeffect "# reg use $0", "{x20}"(i64 %x20) + call void asm sideeffect "# reg use $0", "{x21}"(i64 %x21) + call void asm sideeffect "# reg use $0", "{x22}"(i64 %x22) + call void asm sideeffect "# reg use $0", "{x23}"(i64 %x23) + call void asm sideeffect "# reg use $0", "{x24}"(i64 %x24) + call void asm sideeffect "# reg use $0", "{x25}"(i64 %x25) + call void asm sideeffect "# reg use $0", "{x26}"(i64 %x26) + call void asm sideeffect "# reg use $0", "{x27}"(i64 %x27) + call void asm sideeffect "# reg use $0", "{x28}"(i64 %x28) + ret void + + cold: ; preds = %entry + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + br label %exit + } + ... --- name: relax_tbz @@ -104,13 +189,13 @@ body: | ; COM: Check that cross-section conditional branches are ; COM: relaxed. ; CHECK: bb.0 (%ir-block.1, bbsections 1): - ; CHECK-NEXT: successors: %bb.3(0x40000000) + ; CHECK-NEXT: successors: %bb.3 ; CHECK: TBNZW ; CHECK-SAME: %bb.3 ; CHECK: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3 (%ir-block.1, bbsections 1): - ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: successors: %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -120,7 +205,7 @@ body: | ; CHECK-NEXT: bb.2.true_block (bbsections 3): ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp bb.0 (%ir-block.1, bbsections 1): - successors: %bb.1(0x40000000), %bb.2(0x40000000) + successors: %bb.1, %bb.2 liveins: $w0, $lr early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) @@ -151,20 +236,21 @@ body: | ; COM: Check that branch relaxation relaxes cross-section conditional ; COM: branches by creating trampolines after all other hot basic blocks. ; CHECK: bb.0 (%ir-block.1): - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: successors: %bb.1 + ; CHECK-SAME: , %bb.3 ; CHECK: TBZW ; CHECK-SAME: %bb.3 ; CHECK: bb.1.hot_block: ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp ; CHECK: bb.3 (%ir-block.1): - ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: successors: %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cold_block (bbsections Cold): ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp bb.0 (%ir-block.1): - successors: %bb.1(0x40000000), %bb.2(0x40000000) + successors: %bb.1, %bb.2 liveins: $w0, $lr early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) @@ -197,7 +283,8 @@ body: | ; COM: Check that branch relaxation doesn't insert a trampoline if there is no ; COM: viable insertion location. ; CHECK: bb.0 (%ir-block.1): - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: successors: %bb.1 + ; CHECK-SAME: , %bb.3 ; CHECK: CBNZW ; CHECK-SAME: %bb.1 ; CHECK-NEXT: B @@ -207,7 +294,7 @@ body: | ; CHECK: bb.2.cold (bbsections Cold): ; CHECK: TCRETURNdi bb.0 (%ir-block.1): - successors: %bb.1(0x40000000), %bb.2(0x40000000) + successors: %bb.1, %bb.2 liveins: $w0, $lr early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) @@ -241,7 +328,8 @@ body: | ; COM: Check that relaxation of conditional branches from the Cold section to ; COM: the Hot section doesn't modify the Hot section. ; CHECK: bb.0 (%ir-block.1, bbsections Cold): - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: successors: %bb.1 + ; CHECK-SAME: , %bb.2 ; CHECK: CBNZW ; CHECK-SAME: %bb.1 ; CHECK-NEXT: B %bb.2 @@ -250,7 +338,7 @@ body: | ; CHECK: bb.2.hot_block: ; CHECK: TCRETURNdi bb.0 (%ir-block.1, bbsections Cold): - successors: %bb.1(0x40000000), %bb.2(0x40000000) + successors: %bb.1, %bb.2 liveins: $w0, $lr early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) @@ -284,27 +372,29 @@ body: | ; COM: Check that a conditional branch to a trampoline is properly relaxed ; COM: if the trampoline is pushed out of range. ; INDIRECT: bb.0.entry: - ; INDIRECT-NEXT: successors: %bb.1(0x40000000), %[[TRAMP1:bb.[0-9]+]](0x40000000) + ; INDIRECT-NEXT: successors: %bb.1 + ; INDIRECT-SAME: , %[[TRAMP1:bb.[0-9]+]] ; INDIRECT: TBNZW ; INDIRECT-SAME: %bb.1 ; INDIRECT-NEXT: B{{ }} ; INDIRECT-SAME: %[[TRAMP1]] ; INDIRECT: bb.1.unrelaxable: - ; INDIRECT-NEXT: successors: %bb.2(0x40000000), %[[TRAMP2:bb.[0-9]+]](0x40000000) + ; INDIRECT-NEXT: successors: %bb.2 + ; INDIRECT-SAME: , %[[TRAMP2:bb.[0-9]+]] ; INDIRECT: TBNZW ; INDIRECT-SAME: %bb.2 ; INDIRECT: [[TRAMP2]] - ; INDIRECT-NEXT: successors: %bb.3(0x80000000) + ; INDIRECT-NEXT: successors: %bb.3 ; INDIRECT: bb.2.end: ; INDIRECT: TCRETURNdi ; INDIRECT: [[TRAMP1]].entry: - ; INDIRECT: successors: %bb.3(0x80000000) + ; INDIRECT: successors: %bb.3 ; INDIRECT-NOT: bbsections Cold ; INDIRECT: bb.3.cold (bbsections Cold): ; INDIRECT: TCRETURNdi bb.0.entry (%ir-block.entry): - successors: %bb.1(0x40000000), %bb.3(0x40000000) + successors: %bb.1, %bb.3 liveins: $w0, $w1, $lr early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) @@ -312,7 +402,7 @@ body: | TBZW killed renamable $w0, 0, %bb.3 bb.1.unrelaxable: - successors: %bb.2(0x40000000), %bb.3(0x40000000) + successors: %bb.2, %bb.3 liveins: $w1, $x16 TBNZW killed renamable $w1, 0, %bb.2 @@ -335,3 +425,255 @@ body: | TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp ... + +name: x16_used_cold_to_hot +tracksRegLiveness: true +liveins: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; INDIRECT-LABEL: name: x16_used_cold_to_hot + ; COM: Pre-commit to record the behavior of relaxing a "cold-to-hot" + ; COM: unconditional branch across which x16 is taken but there is + ; COM: still a free register. + ; INDIRECT: bb.0.entry: + ; INDIRECT-NEXT: successors: %bb.1 + ; INDIRECT-SAME: , %bb.3 + ; INDIRECT: TBZW killed renamable $w8, 0, %bb.1 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.3.entry: + ; INDIRECT-NEXT: successors: %bb.2 + ; INDIRECT-NEXT: liveins: $x16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER3:x[0-9]+]] = ADRP target-flags(aarch64-page) + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER3]] = ADDXri $[[SCAVENGED_REGISTER3]], target-flags(aarch64-pageoff, aarch64-nc) , 0 + ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER3]] + ; INDIRECT: bb.1.hot: + ; INDIRECT-NEXT: liveins: $x16 + ; INDIRECT: killed $x16 + ; INDIRECT: RET undef $lr + ; INDIRECT: bb.2.cold (bbsections Cold): + ; INDIRECT-NEXT: successors: %bb.5 + ; INDIRECT-NEXT: liveins: $x16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: INLINEASM &".space 4", 1 /* sideeffect attdialect */ + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.5.cold (bbsections Cold): + ; INDIRECT-NEXT: successors: %bb.1 + ; INDIRECT-NEXT: liveins: $x16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER4:x[0-9]+]] = ADRP target-flags(aarch64-page) + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER4]] = ADDXri $[[SCAVENGED_REGISTER4]], target-flags(aarch64-pageoff, aarch64-nc) , 0 + ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER4]] + + bb.0.entry: + successors: %bb.1, %bb.2 + + $sp = frame-setup SUBXri $sp, 16, 0 + INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + dead renamable $x8 = SUBSXri $x16, 0, 0, implicit-def $nzcv + renamable $w8 = CSINCWr $wzr, $wzr, 1, implicit killed $nzcv + TBZW killed renamable $w8, 0, %bb.1 + + B %bb.2 + + bb.1.hot: + liveins: $x16 + + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + $sp = frame-destroy ADDXri $sp, 16, 0 + RET undef $lr + + bb.2.cold (bbsections Cold): + successors: %bb.1 + liveins: $x16 + + INLINEASM &".space 4", 1 /* sideeffect attdialect */ + B %bb.1 +... +--- +name: all_used_cold_to_hot +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x20', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x21', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x22', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -40, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x23', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x24', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x25', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x26', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x27', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x28', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -96, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; INDIRECT-LABEL: name: all_used_cold_to_hot + ; COM: Pre-commit to record the behavior of relaxing a "cold-to-hot" + ; COM: unconditional branch across which there are no free registers. + ; INDIRECT: bb.0.entry: + ; INDIRECT-NEXT: successors: %bb.3 + ; INDIRECT-NEXT: liveins: $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 + ; INDIRECT-COUNT-29: INLINEASM &"mov + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT: bb.3.entry: + ; INDIRECT-NEXT: successors: %bb.2 + ; INDIRECT-NEXT: liveins: $fp, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, + ; INDIRECT-SAME: $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, + ; INDIRECT-SAME: $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER5:x[0-9]+]] = ADRP target-flags(aarch64-page) + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER5]] = ADDXri $[[SCAVENGED_REGISTER5]], target-flags(aarch64-pageoff, aarch64-nc) , 0 + ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER5]] + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.6.exit: + ; INDIRECT-NEXT: successors: %bb.1 + ; INDIRECT-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, + ; INDIRECT-SAME: $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, + ; INDIRECT-SAME: $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: early-clobber $sp, $[[SPILL_REGISTER:x[0-9]+]] = LDRXpost $sp, 16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.1.exit: + ; INDIRECT-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, + ; INDIRECT-SAME: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, + ; INDIRECT-SAME: $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-COUNT-30: INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed + ; INDIRECT: RET undef $lr + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.2.cold (bbsections Cold): + ; INDIRECT-NEXT: successors: %bb.5 + ; INDIRECT-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, + ; INDIRECT-SAME: $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, + ; INDIRECT-SAME: $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.5.cold (bbsections Cold): + ; INDIRECT-NEXT: successors: %bb.6 + ; INDIRECT-NEXT: liveins: $fp, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, + ; INDIRECT-SAME: $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, + ; INDIRECT-SAME: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: early-clobber $sp = STRXpre $[[SPILL_REGISTER]], $sp, -16 + ; INDIRECT-NEXT: $[[SPILL_REGISTER]] = ADRP target-flags(aarch64-page) + ; INDIRECT-NEXT: $[[SPILL_REGISTER]] = ADDXri $[[SPILL_REGISTER]], target-flags(aarch64-pageoff, aarch64-nc) , 0 + ; INDIRECT-NEXT: BR $[[SPILL_REGISTER]] + + bb.0.entry: + successors: %bb.2 + liveins: $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 + + $sp = frame-setup SUBXri $sp, 112, 0 + frame-setup STRXui killed $fp, $sp, 2 :: (store (s64) into %stack.10) + frame-setup STPXi killed $x28, killed $x27, $sp, 4 :: (store (s64) into %stack.9), (store (s64) into %stack.8) + frame-setup STPXi killed $x26, killed $x25, $sp, 6 :: (store (s64) into %stack.7), (store (s64) into %stack.6) + frame-setup STPXi killed $x24, killed $x23, $sp, 8 :: (store (s64) into %stack.5), (store (s64) into %stack.4) + frame-setup STPXi killed $x22, killed $x21, $sp, 10 :: (store (s64) into %stack.3), (store (s64) into %stack.2) + frame-setup STPXi killed $x20, killed $x19, $sp, 12 :: (store (s64) into %stack.1), (store (s64) into %stack.0) + INLINEASM &"mov x0, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x0 + INLINEASM &"mov x1, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x1 + INLINEASM &"mov x2, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x2 + INLINEASM &"mov x3, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x3 + INLINEASM &"mov x4, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x4 + INLINEASM &"mov x5, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x5 + INLINEASM &"mov x6, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x6 + INLINEASM &"mov x7, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x7 + INLINEASM &"mov x8, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x8 + INLINEASM &"mov x9, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x9 + INLINEASM &"mov x10, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x10 + INLINEASM &"mov x11, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x11 + INLINEASM &"mov x12, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x12 + INLINEASM &"mov x13, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x13 + INLINEASM &"mov x14, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x14 + INLINEASM &"mov x15, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x15 + INLINEASM &"mov x17, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x17 + INLINEASM &"mov x18, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x18 + INLINEASM &"mov x19, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x19 + INLINEASM &"mov x20, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x20 + INLINEASM &"mov x21, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x21 + INLINEASM &"mov x22, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x22 + INLINEASM &"mov x23, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x23 + INLINEASM &"mov x24, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x24 + INLINEASM &"mov x25, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x25 + INLINEASM &"mov x26, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x26 + INLINEASM &"mov x27, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x27 + INLINEASM &"mov x28, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x28 + INLINEASM &"mov fp, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $fp + B %bb.2 + + + bb.1.exit: + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x0 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x1 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x2 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x3 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x4 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x5 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x6 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x7 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x8 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x9 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x10 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x11 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x12 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x13 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x14 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x15 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x17 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x18 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x19 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x20 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x21 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x22 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x23 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x24 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x25 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x26 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x27 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x28 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $fp + $x20, $x19 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + $x22, $x21 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.3), (load (s64) from %stack.2) + $x24, $x23 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.5), (load (s64) from %stack.4) + $x26, $x25 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.7), (load (s64) from %stack.6) + $x28, $x27 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8) + $fp = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.10) + $sp = frame-destroy ADDXri $sp, 112, 0 + RET undef $lr + + bb.2.cold (bbsections Cold): + successors: %bb.1 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + + INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + B %bb.1 + +... -- Gitee From 79c53ad2ee078aa659579847e80690bf18f717d9 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Sat, 29 Jul 2023 00:01:37 +0000 Subject: [PATCH 3/3] [AArch64] [BranchRelaxation] Optimize for hot code size in AArch64 branch relaxation On AArch64, it is safe to let the linker handle relaxation of unconditional branches; in most cases, the destination is within range, and the linker doesn't need to do anything. If the linker does insert fixup code, it clobbers the x16 inter-procedural register, so x16 must be available across the branch before linking. If x16 isn't available, but some other register is, we can relax the branch either by spilling x16 OR using the free register for a manually-inserted indirect branch. This patch builds on D145211. While that patch is for correctness, this one is for performance of the common case. As noted in https://reviews.llvm.org/D145211#4537173, we can trust the linker to relax cross-section unconditional branches across which x16 is available. Programs that use machine function splitting care most about the performance of hot code at the expense of the performance of cold code, so we prioritize minimizing hot code size. Here's a breakdown of the cases: Hot -> Cold [x16 is free across the branch] Do nothing; let the linker relax the branch. Cold -> Hot [x16 is free across the branch] Do nothing; let the linker relax the branch. Hot -> Cold [x16 used across the branch, but there is a free register] Spill x16; let the linker relax the branch. Spilling requires fewer instructions than manually inserting an indirect branch. Cold -> Hot [x16 used across the branch, but there is a free register] Manually insert an indirect branch. Spilling would require adding a restore block in the hot section. Hot -> Cold [No free regs] Spill x16; let the linker relax the branch. Cold -> Hot [No free regs] Spill x16 and put the restore block at the end of the hot function; let the linker relax the branch. Ex: [Hot section] func.hot: ... hot code... func.restore: ... restore x16 ... B func.hot [Cold section] func.cold: ... spill x16 ... B func.restore Putting the restore block at the end of the function instead of just before the destination increases the cost of executing the store, but it avoids putting cold code in the middle of hot code. Since the restore is very rarely taken, this is a worthwhile tradeoff. Differential Revision: https://reviews.llvm.org/D156767 --- llvm/lib/CodeGen/BranchRelaxation.cpp | 37 +++++++++- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 28 +++++--- llvm/test/CodeGen/AArch64/branch-relax-b.ll | 27 +++---- .../AArch64/branch-relax-cross-section.mir | 71 +++++++++++-------- 4 files changed, 108 insertions(+), 55 deletions(-) diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 2995732e0aa8..f50eb5e1730a 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -83,6 +83,8 @@ class BranchRelaxation : public MachineFunctionPass { // The basic block after which trampolines are inserted. This is the last // basic block that isn't in the cold section. MachineBasicBlock *TrampolineInsertionPoint = nullptr; + SmallDenseSet> + RelaxedUnconditionals; std::unique_ptr RS; LivePhysRegs LiveRegs; @@ -148,7 +150,8 @@ void BranchRelaxation::verify() { if (MI.getOpcode() == TargetOpcode::FAULTING_OP) continue; MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); - assert(isBlockInRange(MI, *DestBB)); + assert(isBlockInRange(MI, *DestBB) || + RelaxedUnconditionals.contains({&MBB, DestBB})); } } #endif @@ -170,7 +173,9 @@ LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { void BranchRelaxation::scanFunction() { BlockInfo.clear(); BlockInfo.resize(MF->getNumBlockIDs()); + TrampolineInsertionPoint = nullptr; + RelaxedUnconditionals.clear(); // First thing, compute the size of all basic blocks, and see if the function // has any inline assembly in it. If so, we have to be conservative about @@ -562,6 +567,8 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { BranchBB->sortUniqueLiveIns(); BranchBB->addSuccessor(DestBB); MBB->replaceSuccessor(DestBB, BranchBB); + if (TrampolineInsertionPoint == MBB) + TrampolineInsertionPoint = BranchBB; } DebugLoc DL = MI.getDebugLoc(); @@ -585,8 +592,28 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); - // If RestoreBB is required, try to place just before DestBB. + // If RestoreBB is required, place it appropriately. if (!RestoreBB->empty()) { + // If the jump is Cold -> Hot, don't place the restore block (which is + // cold) in the middle of the function. Place it at the end. + if (MBB->getSectionID() == MBBSectionID::ColdSectionID && + DestBB->getSectionID() != MBBSectionID::ColdSectionID) { + MachineBasicBlock *NewBB = createNewBlockAfter(*TrampolineInsertionPoint); + TII->insertUnconditionalBranch(*NewBB, DestBB, DebugLoc()); + BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB); + + // New trampolines should be inserted after NewBB. + TrampolineInsertionPoint = NewBB; + + // Retarget the unconditional branch to the trampoline block. + BranchBB->replaceSuccessor(DestBB, NewBB); + NewBB->addSuccessor(DestBB); + + DestBB = NewBB; + } + + // In all other cases, try to place just before DestBB. + // TODO: For multiple far branches to the same destination, there are // chances that some restore blocks could be shared if they clobber the // same registers and share the same restore sequence. So far, those @@ -616,9 +643,11 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { RestoreBB->setSectionID(DestBB->getSectionID()); RestoreBB->setIsBeginSection(DestBB->isBeginSection()); DestBB->setIsBeginSection(false); + RelaxedUnconditionals.insert({BranchBB, RestoreBB}); } else { // Remove restore block if it's not required. MF->erase(RestoreBB); + RelaxedUnconditionals.insert({BranchBB, DestBB}); } return true; @@ -644,7 +673,8 @@ bool BranchRelaxation::relaxBranchInstructions() { // Unconditional branch destination might be unanalyzable, assume these // are OK. if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) { - if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last)) { + if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last) && + !RelaxedUnconditionals.contains({&MBB, DestBB})) { fixupUnconditionalBranch(*Last); ++NumUnconditionalRelaxed; Changed = true; @@ -724,6 +754,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs()); BlockInfo.clear(); + RelaxedUnconditionals.clear(); return MadeChange; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e84b61da04a7..6d38898b3f54 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -270,30 +270,40 @@ void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, }; RS->enterBasicBlockEnd(MBB); - Register Reg = RS->FindUnusedReg(&AArch64::GPR64RegClass); - - // If there's a free register, manually insert the indirect branch using it. - if (Reg != AArch64::NoRegister) { - buildIndirectBranch(Reg, NewDestBB); + // If X16 is unused, we can rely on the linker to insert a range extension + // thunk if NewDestBB is out of range of a single B instruction. + constexpr Register Reg = AArch64::X16; + if (!RS->isRegUsed(Reg)) { + insertUnconditionalBranch(MBB, &NewDestBB, DL); RS->setRegUsed(Reg); return; } - // Otherwise, spill and use X16. This briefly moves the stack pointer, making - // it incompatible with red zones. + // If there's a free register and it's worth inflating the code size, + // manually insert the indirect branch. + Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass); + if (Scavenged != AArch64::NoRegister && + MBB.getSectionID() == MBBSectionID::ColdSectionID) { + buildIndirectBranch(Scavenged, NewDestBB); + RS->setRegUsed(Scavenged); + return; + } + + // Note: Spilling X16 briefly moves the stack pointer, making it incompatible + // with red zones. AArch64FunctionInfo *AFI = MBB.getParent()->getInfo(); if (!AFI || AFI->hasRedZone().value_or(true)) report_fatal_error( "Unable to insert indirect branch inside function that has red zone"); - Reg = AArch64::X16; + // Otherwise, spill X16 and defer range extension to the linker. BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre)) .addReg(AArch64::SP, RegState::Define) .addReg(Reg) .addReg(AArch64::SP) .addImm(-16); - buildIndirectBranch(Reg, RestoreBB); + BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB); BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost)) .addReg(AArch64::SP, RegState::Define) diff --git a/llvm/test/CodeGen/AArch64/branch-relax-b.ll b/llvm/test/CodeGen/AArch64/branch-relax-b.ll index 3f1501c01451..44b730f2207f 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-b.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-b.ll @@ -6,9 +6,7 @@ define void @relax_b_nospill(i1 zeroext %0) { ; CHECK-NEXT: tbnz w0, ; CHECK-SAME: LBB0_1 ; CHECK-NEXT: // %bb.3: // %entry -; CHECK-NEXT: adrp [[SCAVENGED_REGISTER:x[0-9]+]], .LBB0_2 -; CHECK-NEXT: add [[SCAVENGED_REGISTER]], [[SCAVENGED_REGISTER]], :lo12:.LBB0_2 -; CHECK-NEXT: br [[SCAVENGED_REGISTER]] +; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %iftrue ; CHECK-NEXT: //APP ; CHECK-NEXT: .zero 2048 @@ -44,9 +42,7 @@ define void @relax_b_spill() { ; CHECK-NEXT: // %bb.4: // %entry ; CHECK-NEXT: str [[SPILL_REGISTER:x[0-9]+]], [sp, ; CHECK-SAME: -16]! -; CHECK-NEXT: adrp [[SPILL_REGISTER]], .LBB1_5 -; CHECK-NEXT: add [[SPILL_REGISTER]], [[SPILL_REGISTER]], :lo12:.LBB1_5 -; CHECK-NEXT: br [[SPILL_REGISTER]] +; CHECK-NEXT: b .LBB1_5 ; CHECK-NEXT: .LBB1_1: // %iftrue ; CHECK-NEXT: //APP ; CHECK-NEXT: .zero 2048 @@ -137,23 +133,28 @@ iffalse: define void @relax_b_x16_taken() { ; CHECK-LABEL: relax_b_x16_taken: // @relax_b_x16_taken -; COM: Pre-commit to record the behavior of relaxing an unconditional -; COM: branch across which x16 is taken. +; COM: Since the source of the out-of-range branch is hot and x16 is +; COM: taken, it makes sense to spill x16 and let the linker insert +; COM: fixup code for this branch rather than inflating the hot code +; COM: size by eagerly relaxing the unconditional branch. ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: //APP ; CHECK-NEXT: mov x16, #1 ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: cbnz x16, .LBB2_1 ; CHECK-NEXT: // %bb.3: // %entry -; CHECK-NEXT: adrp [[SCAVENGED_REGISTER2:x[0-9]+]], .LBB2_2 -; CHECK-NEXT: add [[SCAVENGED_REGISTER2]], [[SCAVENGED_REGISTER2]], :lo12:.LBB2_2 -; CHECK-NEXT: br [[SCAVENGED_REGISTER2]] +; CHECK-NEXT: str [[SPILL_REGISTER]], [sp, +; CHECK-SAME: -16]! +; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_1: // %iftrue ; CHECK-NEXT: //APP ; CHECK-NEXT: .zero 2048 ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_2: // %iffalse +; CHECK-NEXT: .LBB2_4: // %iffalse +; CHECK-NEXT: ldr [[SPILL_REGISTER]], [sp], +; CHECK-SAME: 16 +; CHECK-NEXT: // %bb.2: // %iffalse ; CHECK-NEXT: //APP ; CHECK-NEXT: // reg use x16 ; CHECK-NEXT: //NO_APP @@ -174,4 +175,4 @@ iffalse: } declare i32 @bar() -declare i32 @baz() \ No newline at end of file +declare i32 @baz() diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir index e7a56d4c7a4d..f8f0b76f1c9f 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -384,12 +384,19 @@ body: | ; INDIRECT: TBNZW ; INDIRECT-SAME: %bb.2 ; INDIRECT: [[TRAMP2]] - ; INDIRECT-NEXT: successors: %bb.3 + ; INDIRECT-NEXT: successors: %bb.6 ; INDIRECT: bb.2.end: ; INDIRECT: TCRETURNdi ; INDIRECT: [[TRAMP1]].entry: - ; INDIRECT: successors: %bb.3 - ; INDIRECT-NOT: bbsections Cold + ; INDIRECT-NEXT: successors: %[[TRAMP1_SPILL:bb.[0-9]+]] + ; INDIRECT: [[TRAMP1_SPILL]].entry: + ; INDIRECT-NEXT: successors: %[[TRAMP1_RESTORE:bb.[0-9]+]] + ; INDIRECT: early-clobber $sp = STRXpre $[[SPILL_REGISTER:x[0-9]+]], $sp, -16 + ; INDIRECT-NEXT: B %[[TRAMP1_RESTORE:bb.[0-9]+]] + ; INDIRECT: [[TRAMP1_RESTORE]].cold (bbsections Cold): + ; INDIRECT-NEXT: successors: %bb.3 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: early-clobber $sp, $[[SPILL_REGISTER]] = LDRXpost $sp, 16 ; INDIRECT: bb.3.cold (bbsections Cold): ; INDIRECT: TCRETURNdi @@ -433,26 +440,30 @@ machineFunctionInfo: hasRedZone: false body: | ; INDIRECT-LABEL: name: x16_used_cold_to_hot - ; COM: Pre-commit to record the behavior of relaxing a "cold-to-hot" - ; COM: unconditional branch across which x16 is taken but there is - ; COM: still a free register. + ; COM: Check that unconditional branches from the cold section to + ; COM: the hot section manually insert indirect branches if x16 + ; COM: isn't available but there is still a free register. ; INDIRECT: bb.0.entry: ; INDIRECT-NEXT: successors: %bb.1 ; INDIRECT-SAME: , %bb.3 ; INDIRECT: TBZW killed renamable $w8, 0, %bb.1 ; INDIRECT-NEXT: {{ $}} ; INDIRECT-NEXT: bb.3.entry: - ; INDIRECT-NEXT: successors: %bb.2 + ; INDIRECT-NEXT: successors: %bb.4 ; INDIRECT-NEXT: liveins: $x16 ; INDIRECT-NEXT: {{ $}} - ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER3:x[0-9]+]] = ADRP target-flags(aarch64-page) - ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER3]] = ADDXri $[[SCAVENGED_REGISTER3]], target-flags(aarch64-pageoff, aarch64-nc) , 0 - ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER3]] + ; INDIRECT-NEXT: early-clobber $sp = STRXpre $[[SPILL_REGISTER]], $sp, -16 + ; INDIRECT-NEXT: B %bb.4 ; INDIRECT: bb.1.hot: ; INDIRECT-NEXT: liveins: $x16 ; INDIRECT: killed $x16 ; INDIRECT: RET undef $lr - ; INDIRECT: bb.2.cold (bbsections Cold): + ; INDIRECT: bb.4.cold (bbsections Cold): + ; INDIRECT-NEXT: successors: %bb.2 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: early-clobber $sp, $[[SPILL_REGISTER]] = LDRXpost $sp, 16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.2.cold (bbsections Cold): ; INDIRECT-NEXT: successors: %bb.5 ; INDIRECT-NEXT: liveins: $x16 ; INDIRECT-NEXT: {{ $}} @@ -462,9 +473,9 @@ body: | ; INDIRECT-NEXT: successors: %bb.1 ; INDIRECT-NEXT: liveins: $x16 ; INDIRECT-NEXT: {{ $}} - ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER4:x[0-9]+]] = ADRP target-flags(aarch64-page) - ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER4]] = ADDXri $[[SCAVENGED_REGISTER4]], target-flags(aarch64-pageoff, aarch64-nc) , 0 - ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER4]] + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER:x[0-9]+]] = ADRP target-flags(aarch64-page) + ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER]] = ADDXri $[[SCAVENGED_REGISTER]], target-flags(aarch64-pageoff, aarch64-nc) , 0 + ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER]] bb.0.entry: successors: %bb.1, %bb.2 @@ -532,8 +543,10 @@ machineFunctionInfo: hasRedZone: false body: | ; INDIRECT-LABEL: name: all_used_cold_to_hot - ; COM: Pre-commit to record the behavior of relaxing a "cold-to-hot" - ; COM: unconditional branch across which there are no free registers. + ; COM: Check that unconditional branches from the cold section to + ; COM: the hot section spill x16 and defer indirect branch + ; COM: insertion to the linker if there are no free general-purpose + ; COM: registers. ; INDIRECT: bb.0.entry: ; INDIRECT-NEXT: successors: %bb.3 ; INDIRECT-NEXT: liveins: $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 @@ -545,17 +558,7 @@ body: | ; INDIRECT-SAME: $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, ; INDIRECT-SAME: $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 ; INDIRECT-NEXT: {{ $}} - ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER5:x[0-9]+]] = ADRP target-flags(aarch64-page) - ; INDIRECT-NEXT: $[[SCAVENGED_REGISTER5]] = ADDXri $[[SCAVENGED_REGISTER5]], target-flags(aarch64-pageoff, aarch64-nc) , 0 - ; INDIRECT-NEXT: BR $[[SCAVENGED_REGISTER5]] - ; INDIRECT-NEXT: {{ $}} - ; INDIRECT-NEXT: bb.6.exit: - ; INDIRECT-NEXT: successors: %bb.1 - ; INDIRECT-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, - ; INDIRECT-SAME: $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, - ; INDIRECT-SAME: $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 - ; INDIRECT-NEXT: {{ $}} - ; INDIRECT-NEXT: early-clobber $sp, $[[SPILL_REGISTER:x[0-9]+]] = LDRXpost $sp, 16 + ; INDIRECT-NEXT: B %bb.2 ; INDIRECT-NEXT: {{ $}} ; INDIRECT-NEXT: bb.1.exit: ; INDIRECT-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, @@ -565,6 +568,16 @@ body: | ; INDIRECT-COUNT-30: INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed ; INDIRECT: RET undef $lr ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.6.exit: + ; INDIRECT-NEXT: successors: %bb.7(0x80000000) + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: early-clobber $sp, $[[SPILL_REGISTER]] = LDRXpost $sp, 16 + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: bb.7.exit: + ; INDIRECT-NEXT: successors: %bb.1(0x80000000) + ; INDIRECT-NEXT: {{ $}} + ; INDIRECT-NEXT: B %bb.1 + ; INDIRECT-NEXT: {{ $}} ; INDIRECT-NEXT: bb.2.cold (bbsections Cold): ; INDIRECT-NEXT: successors: %bb.5 ; INDIRECT-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, @@ -580,9 +593,7 @@ body: | ; INDIRECT-SAME: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 ; INDIRECT-NEXT: {{ $}} ; INDIRECT-NEXT: early-clobber $sp = STRXpre $[[SPILL_REGISTER]], $sp, -16 - ; INDIRECT-NEXT: $[[SPILL_REGISTER]] = ADRP target-flags(aarch64-page) - ; INDIRECT-NEXT: $[[SPILL_REGISTER]] = ADDXri $[[SPILL_REGISTER]], target-flags(aarch64-pageoff, aarch64-nc) , 0 - ; INDIRECT-NEXT: BR $[[SPILL_REGISTER]] + ; INDIRECT-NEXT: B %bb.6 bb.0.entry: successors: %bb.2 -- Gitee