From bbb09c27f85e12eefae12a04c12887f435826909 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Wed, 25 Jan 2023 22:26:47 -0800 Subject: [PATCH 1/2] [Backport][Pseudo Probe] Do not instrument EH blocks. This change avoids inserting probes to EH blocks. Pseudo probe can prevent block merging when probes in the blocks look different. This has a chained effect to passes incurring exponential IR growth (such as jump threading) and as a consequence the compilation may time out. Not inserting probes to EH blocks could mitigate the issue. Another benefit is that both IR size and binary size are smaller. Since EH blocks are usually cold, the change should have minimal impact to profile quality. Testing: Out of two internal large benchmarks, no perf impact seen. 1% size savings to both the `text` and the `pseudo_probe` section. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D142747 --- llvm/include/llvm/Analysis/EHUtils.h | 90 +++++++++++++++++++ llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 ++ llvm/include/llvm/CodeGen/MachineSSAContext.h | 4 - llvm/lib/CodeGen/MachineFunctionSplitter.cpp | 79 ++-------------- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 11 ++- .../SampleProfile/pseudo-probe-eh.ll | 43 +++++++++ 6 files changed, 157 insertions(+), 76 deletions(-) create mode 100644 llvm/include/llvm/Analysis/EHUtils.h create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll diff --git a/llvm/include/llvm/Analysis/EHUtils.h b/llvm/include/llvm/Analysis/EHUtils.h new file mode 100644 index 000000000000..728ab53c89bc --- /dev/null +++ b/llvm/include/llvm/Analysis/EHUtils.h @@ -0,0 +1,90 @@ +//===-- Analysis/EHUtils.h - Exception handling related utils --*-//C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#ifndef LLVM_ANALYSIS_EHUTILS_H +#define LLVM_ANALYSIS_EHUTILS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" + +namespace llvm { + +/// Compute a list of blocks that are only reachable via EH paths. +template +static void computeEHOnlyBlocks(FunctionT &F, DenseSet &EHBlocks) { + // A block can be unknown if its not reachable from anywhere + // EH if its only reachable from start blocks via some path through EH pads + // NonEH if it's reachable from Non EH blocks as well. + enum Status { Unknown = 0, EH = 1, NonEH = 2 }; + DenseSet WorkList; + DenseMap Statuses; + + auto GetStatus = [&](BlockT *BB) { + if (Statuses.find(BB) != Statuses.end()) + return Statuses[BB]; + else + return Unknown; + }; + + auto CheckPredecessors = [&](BlockT *BB, Status Stat) { + for (auto *PredBB : predecessors(BB)) { + Status PredStatus = GetStatus(PredBB); + // If status of predecessor block has gone above current block + // we update current blocks status. + if (PredStatus > Stat) + Stat = PredStatus; + } + return Stat; + }; + + auto AddSuccesors = [&](BlockT *BB) { + for (auto *SuccBB : successors(BB)) { + if (!SuccBB->isEHPad()) + WorkList.insert(SuccBB); + } + }; + + // Insert the successors of start block and landing pads successor. + BlockT *StartBlock = &F.front(); + Statuses[StartBlock] = NonEH; + AddSuccesors(StartBlock); + + for (auto &BB : F) { + if (BB.isEHPad()) { + AddSuccesors(&BB); + Statuses[&BB] = EH; + } + } + + // Worklist iterative algorithm. + while (!WorkList.empty()) { + auto *BB = *WorkList.begin(); + WorkList.erase(BB); + + Status OldStatus = GetStatus(BB); + + // Check on predecessors and check for + // Status update. + Status NewStatus = CheckPredecessors(BB, OldStatus); + + // Did the block status change? + bool Changed = OldStatus != NewStatus; + if (Changed) { + AddSuccesors(BB); + Statuses[BB] = NewStatus; + } + } + + EHBlocks.clear(); + for (auto Entry : Statuses) { + if (Entry.second == EH) + EHBlocks.insert(Entry.first); + } +} +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index af8db60b1e8e..7f5c2ca92ec3 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -1276,6 +1276,12 @@ template <> struct GraphTraits> { static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; +// These accessors are handy for sharing templated code between IR and MIR. +inline auto successors(const MachineBasicBlock *BB) { return BB->successors(); } +inline auto predecessors(const MachineBasicBlock *BB) { + return BB->predecessors(); +} + /// MachineInstrSpan provides an interface to get an iteration range /// containing the instruction it was initialized with, along with all /// those instructions inserted prior to or following that instruction diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h index e3b2dc459881..31a192cd8d29 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -26,10 +26,6 @@ class Register; template class GenericSSAContext; template class DominatorTreeBase; -inline auto successors(const MachineBasicBlock *BB) { return BB->successors(); } -inline auto predecessors(const MachineBasicBlock *BB) { - return BB->predecessors(); -} inline unsigned succ_size(const MachineBasicBlock *BB) { return BB->succ_size(); } diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 31c546fe0771..5a46cb0b16cb 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -24,6 +24,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -84,75 +85,13 @@ public: } // end anonymous namespace /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable -/// only by EH pad as cold. This will help mark EH pads statically cold instead -/// of relying on profile data. -static void -setDescendantEHBlocksCold(SmallVectorImpl &EHBlocks, - MachineFunction &MF) { - MachineBasicBlock *StartBlock = &MF.front(); - // A block can be unknown if its not reachable from anywhere - // EH if its only reachable from start blocks via some path through EH pads - // NonEH if it's reachable from Non EH blocks as well. - enum Status { Unknown = 0, EH = 1, NonEH = 2 }; - DenseSet WorkList; - DenseMap Statuses; - - auto getStatus = [&](MachineBasicBlock *MBB) { - if (Statuses.find(MBB) != Statuses.end()) - return Statuses[MBB]; - else - return Unknown; - }; - - auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) { - for (auto *PredMBB : MBB->predecessors()) { - Status PredStatus = getStatus(PredMBB); - // If status of predecessor block has gone above current block - // we update current blocks status. - if (PredStatus > Stat) - Stat = PredStatus; - } - return Stat; - }; - - auto addSuccesors = [&](MachineBasicBlock *MBB) { - for (auto *SuccMBB : MBB->successors()) { - if (!SuccMBB->isEHPad()) - WorkList.insert(SuccMBB); - } - }; - - // Insert the successors of start block - // and landing pads successor. - Statuses[StartBlock] = NonEH; - addSuccesors(StartBlock); - for (auto *LP : EHBlocks) { - addSuccesors(LP); - Statuses[LP] = EH; - } - - // Worklist iterative algorithm. - while (!WorkList.empty()) { - auto *MBB = *WorkList.begin(); - WorkList.erase(MBB); - - Status OldStatus = getStatus(MBB); - - // Check on predecessors and check for - // Status update. - Status NewStatus = checkPredecessors(MBB, OldStatus); - - // Did the block status change? - bool changed = OldStatus != NewStatus; - if (changed) { - addSuccesors(MBB); - Statuses[MBB] = NewStatus; - } - } - - for (auto Entry : Statuses) { - if (Entry.second == EH) - Entry.first->setSectionID(MBBSectionID::ColdSectionID); +/// only by EH pad as cold. This will help mark EH pads statically cold +/// instead of relying on profile data. +static void setDescendantEHBlocksCold(MachineFunction &MF) { + DenseSet EHBlocks; + computeEHOnlyBlocks(MF, EHBlocks); + for (auto Block : EHBlocks) { + Block->setSectionID(MBBSectionID::ColdSectionID); } } @@ -213,7 +152,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // Split all EH code and it's descendant statically by default. if (SplitAllEHCode) - setDescendantEHBlocksCold(LandingPads, MF); + setDescendantEHBlocksCold(MF); // We only split out eh pads if all of them are cold. else { bool HasHotLandingPads = false; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index c4844dbe7f3c..a819bd9fb6e0 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -32,7 +33,7 @@ #include using namespace llvm; -#define DEBUG_TYPE "sample-profile-probe" +#define DEBUG_TYPE "pseudo-probe" STATISTIC(ArtificialDbgLine, "Number of probes that have an artificial debug line"); @@ -253,8 +254,14 @@ void SampleProfileProber::computeCFGHash() { } void SampleProfileProber::computeProbeIdForBlocks() { + DenseSet KnownColdBlocks; + computeEHOnlyBlocks(*F, KnownColdBlocks); + // Insert pseudo probe to non-cold blocks only. This will reduce IR size as + // well as the binary size while retaining the profile quality. for (auto &BB : *F) { - BlockProbeIds[&BB] = ++LastProbeId; + ++LastProbeId; + if (!KnownColdBlocks.contains(&BB)) + BlockProbeIds[&BB] = LastProbeId; } } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll new file mode 100644 index 000000000000..697ef44fb7ed --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll @@ -0,0 +1,43 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o - | FileCheck %s + +;; Check the generation of pseudoprobe intrinsic call for non-EH blocks only. + +declare i32 @__gxx_personality_v0(...) +declare i32 @llvm.eh.typeid.for(ptr) nounwind +declare ptr @__cxa_begin_catch(ptr) +declare void @__cxa_end_catch() +declare void @bar() + +@_ZTIi = external constant ptr + +define void @foo() uwtable ssp personality ptr @__gxx_personality_v0 { +entry: +; CHECK: call void @llvm.pseudoprobe + invoke void @bar() + to label %ret unwind label %lpad + +ret: +; CHECK: call void @llvm.pseudoprobe + ret void + +lpad: ; preds = %entry +; CHECK-NOT: call void @llvm.pseudoprobe + %exn = landingpad {ptr, i32} + catch ptr @_ZTIi + %eh.exc = extractvalue { ptr, i32 } %exn, 0 + %eh.selector = extractvalue { ptr, i32 } %exn, 1 + %0 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind + %1 = icmp eq i32 %eh.selector, %0 + br i1 %1, label %catch, label %eh.resume + +catch: +; CHECK-NOT: call void @llvm.pseudoprobe + %ignored = call ptr @__cxa_begin_catch(ptr %eh.exc) nounwind + call void @__cxa_end_catch() nounwind + br label %ret + +eh.resume: +; CHECK-NOT: call void @llvm.pseudoprobe + resume { ptr, i32 } %exn +} -- Gitee From 0a6d0c1894f160ae45d0b4697a1dd782b47b8a0d Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Mon, 1 Apr 2024 13:54:54 -0700 Subject: [PATCH 2/2] [Backport][PseudoProbe] Extend to skip instrumenting probe into the dests of invoke (#79919) As before we only skip instrumenting probe of `unwind`(`KnownColdBlock`) block, this PR extends to skip the both EH flow from `invoke`, i.e. also skip the `normal` dest. For more contexts: when doing call-to-invoke conversion, the block is split by the `invoke` and two extra blocks(`normal` and `unwind`) are added. With this PR, the instrumentation is the same as the one before the call-to-invoke conversion. One significant benefit is this can help mitigate the "unstable IR" issue(https://discourse.llvm.org/t/ipo-for-linkonce-odr-functions/69404), the two versions now are on the same probe instrumentation, expected to be the same checksum. To achieve the same checksum, some tweaks is needed: - Now it also skips incrementing the probe ID for the skipped probe. - The checksum is also computed based on the CFG that skips the EH edges. We observed this fixes ~5% mismatched samples. --- llvm/include/llvm/Analysis/EHUtils.h | 1 - .../llvm/Transforms/IPO/SampleProfileProbe.h | 13 +- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 122 ++++++++++++-- .../ThinLTO/X86/pseudo-probe-desc-import.ll | 4 +- .../SampleProfile/pseudo-probe-eh.ll | 2 +- .../SampleProfile/pseudo-probe-invoke.ll | 155 ++++++++++++++++++ 6 files changed, 277 insertions(+), 20 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll diff --git a/llvm/include/llvm/Analysis/EHUtils.h b/llvm/include/llvm/Analysis/EHUtils.h index 728ab53c89bc..c8319409643c 100644 --- a/llvm/include/llvm/Analysis/EHUtils.h +++ b/llvm/include/llvm/Analysis/EHUtils.h @@ -79,7 +79,6 @@ static void computeEHOnlyBlocks(FunctionT &F, DenseSet &EHBlocks) { } } - EHBlocks.clear(); for (auto Entry : Statuses) { if (Entry.second == EH) EHBlocks.insert(Entry.first); diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index ebac3d6a24ef..aea557452460 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -107,8 +107,17 @@ private: uint64_t getFunctionHash() const { return FunctionHash; } uint32_t getBlockId(const BasicBlock *BB) const; uint32_t getCallsiteId(const Instruction *Call) const; - void computeCFGHash(); - void computeProbeIdForBlocks(); + void findUnreachableBlocks(DenseSet &BlocksToIgnore); + void findInvokeNormalDests(DenseSet &InvokeNormalDests); + void computeBlocksToIgnore(DenseSet &BlocksToIgnore, + DenseSet &BlocksAndCallsToIgnore); + void computeProbeIdForCallsites( + const DenseSet &BlocksAndCallsToIgnore); + const Instruction * + getOriginalTerminator(const BasicBlock *Head, + const DenseSet &BlocksToIgnore); + void computeCFGHash(const DenseSet &BlocksToIgnore); + void computeProbeIdForBlocks(const DenseSet &BlocksToIgnore); void computeProbeIdForCallsites(); Function *F; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index a819bd9fb6e0..7e495d1591d4 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -217,23 +217,114 @@ SampleProfileProber::SampleProfileProber(Function &Func, BlockProbeIds.clear(); CallProbeIds.clear(); LastProbeId = (uint32_t)PseudoProbeReservedId::Last; - computeProbeIdForBlocks(); - computeProbeIdForCallsites(); - computeCFGHash(); + + DenseSet BlocksToIgnore; + DenseSet BlocksAndCallsToIgnore; + computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore); + + computeProbeIdForBlocks(BlocksToIgnore); + computeProbeIdForCallsites(BlocksAndCallsToIgnore); + computeCFGHash(BlocksToIgnore); +} + +// Two purposes to compute the blocks to ignore: +// 1. Reduce the IR size. +// 2. Make the instrumentation(checksum) stable. e.g. the frondend may +// generate unstable IR while optimizing nounwind attribute, some versions are +// optimized with the call-to-invoke conversion, while other versions do not. +// This discrepancy in probe ID could cause profile mismatching issues. +// Note that those ignored blocks are either cold blocks or new split blocks +// whose original blocks are instrumented, so it shouldn't degrade the profile +// quality. +void SampleProfileProber::computeBlocksToIgnore( + DenseSet &BlocksToIgnore, + DenseSet &BlocksAndCallsToIgnore) { + // Ignore the cold EH and unreachable blocks and calls. + computeEHOnlyBlocks(*F, BlocksAndCallsToIgnore); + findUnreachableBlocks(BlocksAndCallsToIgnore); + + BlocksToIgnore.insert(BlocksAndCallsToIgnore.begin(), + BlocksAndCallsToIgnore.end()); + + // Handle the call-to-invoke conversion case: make sure that the probe id and + // callsite id are consistent before and after the block split. For block + // probe, we only keep the head block probe id and ignore the block ids of the + // normal dests. For callsite probe, it's different to block probe, there is + // no additional callsite in the normal dests, so we don't ignore the + // callsites. + findInvokeNormalDests(BlocksToIgnore); +} + +// Unreachable blocks and calls are always cold, ignore them. +void SampleProfileProber::findUnreachableBlocks( + DenseSet &BlocksToIgnore) { + for (auto &BB : *F) { + if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0) + BlocksToIgnore.insert(&BB); + } +} + +// In call-to-invoke conversion, basic block can be split into multiple blocks, +// only instrument probe in the head block, ignore the normal dests. +void SampleProfileProber::findInvokeNormalDests( + DenseSet &InvokeNormalDests) { + for (auto &BB : *F) { + auto *TI = BB.getTerminator(); + if (auto *II = dyn_cast(TI)) { + auto *ND = II->getNormalDest(); + InvokeNormalDests.insert(ND); + + // The normal dest and the try/catch block are connected by an + // unconditional branch. + while (pred_size(ND) == 1) { + auto *Pred = *pred_begin(ND); + if (succ_size(Pred) == 1) { + InvokeNormalDests.insert(Pred); + ND = Pred; + } else + break; + } + } + } +} + +// The call-to-invoke conversion splits the original block into a list of block, +// we need to compute the hash using the original block's successors to keep the +// CFG Hash consistent. For a given head block, we keep searching the +// succesor(normal dest or unconditional branch dest) to find the tail block, +// the tail block's successors are the original block's successors. +const Instruction *SampleProfileProber::getOriginalTerminator( + const BasicBlock *Head, const DenseSet &BlocksToIgnore) { + auto *TI = Head->getTerminator(); + if (auto *II = dyn_cast(TI)) { + return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore); + } else if (succ_size(Head) == 1 && + BlocksToIgnore.contains(*succ_begin(Head))) { + // Go to the unconditional branch dest. + return getOriginalTerminator(*succ_begin(Head), BlocksToIgnore); + } + return TI; } // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index // value of each BB in the CFG. The higher 32 bits record the number of edges // preceded by the number of indirect calls. // This is derived from FuncPGOInstrumentation::computeCFGHash(). -void SampleProfileProber::computeCFGHash() { +void SampleProfileProber::computeCFGHash( + const DenseSet &BlocksToIgnore) { std::vector Indexes; JamCRC JC; for (auto &BB : *F) { - auto *TI = BB.getTerminator(); + if (BlocksToIgnore.contains(&BB)) + continue; + + auto *TI = getOriginalTerminator(&BB, BlocksToIgnore); for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { auto *Succ = TI->getSuccessor(I); auto Index = getBlockId(Succ); + // Ingore ignored-block(zero ID) to avoid unstable checksum. + if (Index == 0) + continue; for (int J = 0; J < 4; J++) Indexes.push_back((uint8_t)(Index >> (J * 8))); } @@ -253,20 +344,23 @@ void SampleProfileProber::computeCFGHash() { << ", Hash = " << FunctionHash << "\n"); } -void SampleProfileProber::computeProbeIdForBlocks() { - DenseSet KnownColdBlocks; - computeEHOnlyBlocks(*F, KnownColdBlocks); - // Insert pseudo probe to non-cold blocks only. This will reduce IR size as - // well as the binary size while retaining the profile quality. +void SampleProfileProber::computeProbeIdForBlocks( + const DenseSet &BlocksToIgnore) { for (auto &BB : *F) { - ++LastProbeId; - if (!KnownColdBlocks.contains(&BB)) - BlockProbeIds[&BB] = LastProbeId; + if (BlocksToIgnore.contains(&BB)) + continue; + BlockProbeIds[&BB] = ++LastProbeId; } } -void SampleProfileProber::computeProbeIdForCallsites() { +void SampleProfileProber::computeProbeIdForCallsites( + const DenseSet &BlocksAndCallsToIgnore) { + LLVMContext &Ctx = F->getContext(); + Module *M = F->getParent(); + for (auto &BB : *F) { + if (BlocksAndCallsToIgnore.contains(&BB)) + continue; for (auto &I : BB) { if (!isa(I)) continue; diff --git a/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll b/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll index 21dd8c0fe924..f915aaccc06e 100644 --- a/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll +++ b/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll @@ -12,8 +12,8 @@ ; RUN: llvm-lto -thinlto-action=import %t3.bc -thinlto-index=%t3.index.bc -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN -; CHECK-NOT: {i64 6699318081062747564, i64 4294967295, !"foo" -; CHECK: !{i64 -2624081020897602054, i64 281479271677951, !"main" +; CHECK-NOT: {i64 6699318081062747564, i64 [[#]], !"foo" +; CHECK: !{i64 -2624081020897602054, i64 [[#]], !"main" ; WARN: warning: Pseudo-probe ignored: source module '{{.*}}' is compiled with -fpseudo-probe-for-profiling while destination module '{{.*}}' is not diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll index 697ef44fb7ed..9954914bca43 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll @@ -18,7 +18,7 @@ entry: to label %ret unwind label %lpad ret: -; CHECK: call void @llvm.pseudoprobe +; CHECK-NOT: call void @llvm.pseudoprobe ret void lpad: ; preds = %entry diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll new file mode 100644 index 000000000000..822ab403dee2 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll @@ -0,0 +1,155 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes=pseudo-probe -S -o - | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__clang_call_terminate = comdat any + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: mustprogress noinline nounwind uwtable +define dso_local void @_Z3barv() #0 personality ptr @__gxx_personality_v0 !dbg !14 { +entry: +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 1 + %0 = load volatile i32, ptr @x, align 4, !dbg !17, !tbaa !19 + %tobool = icmp ne i32 %0, 0, !dbg !17 + br i1 %tobool, label %if.then, label %if.else, !dbg !23 + +if.then: ; preds = %entry +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2 + invoke void @_Z3foov() + to label %invoke.cont unwind label %terminate.lpad, !dbg !24 + +invoke.cont: ; preds = %if.then +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + invoke void @_Z3bazv() + to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26 + +invoke.cont1: ; preds = %invoke.cont +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + br label %if.end, !dbg !27 + +if.else: ; preds = %entry +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3 + invoke void @_Z3foov() + to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28 + +invoke.cont2: ; preds = %if.else +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + br label %if.end + +if.end: ; preds = %invoke.cont2, %invoke.cont1 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4 + invoke void @_Z3foov() + to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29 + +invoke.cont3: ; preds = %if.end +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + %1 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !19 + %tobool4 = icmp ne i32 %1, 0, !dbg !30 + br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32 + +if.then5: ; preds = %invoke.cont3 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5 + %2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19 + %inc = add nsw i32 %2, 1, !dbg !33 + store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19 + br label %if.end6, !dbg !35 + +if.end6: ; preds = %if.then5, %invoke.cont3 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6 + ret void, !dbg !36 + +terminate.lpad: ; preds = %if.end, %if.else, %invoke.cont, %if.then +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + %3 = landingpad { ptr, i32 } + catch ptr null, !dbg !24 + %4 = extractvalue { ptr, i32 } %3, 0, !dbg !24 + call void @__clang_call_terminate(ptr %4) #3, !dbg !24 + unreachable, !dbg !24 +} + +; Function Attrs: mustprogress noinline nounwind uwtable +define dso_local void @_Z3foov() #0 !dbg !37 { +entry: + ret void, !dbg !38 +} + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: noinline noreturn nounwind uwtable +define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) #1 comdat { + %2 = call ptr @__cxa_begin_catch(ptr %0) #4 + call void @_ZSt9terminatev() #3 + unreachable +} + +declare ptr @__cxa_begin_catch(ptr) + +declare void @_ZSt9terminatev() + +; Function Attrs: mustprogress noinline nounwind uwtable +define dso_local void @_Z3bazv() #0 !dbg !39 { +entry: + ret void, !dbg !40 +} + +; CHECK: ![[#]] = !{i64 -3270123626113159616, i64 4294967295, !"_Z3bazv"} + +attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { mustprogress noinline norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { noreturn nounwind } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12} +!llvm.ident = !{!13} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "a4c7b0392f3fd9c8ebb85065159dbb02") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{!"clang version 19.0.0"} +!14 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!15 = !DISubroutineType(types: !16) +!16 = !{null} +!17 = !DILocation(line: 5, column: 6, scope: !18) +!18 = distinct !DILexicalBlock(scope: !14, file: !3, line: 5, column: 6) +!19 = !{!20, !20, i64 0} +!20 = !{!"int", !21, i64 0} +!21 = !{!"omnipotent char", !22, i64 0} +!22 = !{!"Simple C++ TBAA"} +!23 = !DILocation(line: 5, column: 6, scope: !14) +!24 = !DILocation(line: 6, column: 5, scope: !25) +!25 = distinct !DILexicalBlock(scope: !18, file: !3, line: 5, column: 9) +!26 = !DILocation(line: 7, column: 5, scope: !25) +!27 = !DILocation(line: 8, column: 3, scope: !25) +!28 = !DILocation(line: 9, column: 5, scope: !18) +!29 = !DILocation(line: 11, column: 3, scope: !14) +!30 = !DILocation(line: 12, column: 6, scope: !31) +!31 = distinct !DILexicalBlock(scope: !14, file: !3, line: 12, column: 6) +!32 = !DILocation(line: 12, column: 6, scope: !14) +!33 = !DILocation(line: 13, column: 5, scope: !34) +!34 = distinct !DILexicalBlock(scope: !31, file: !3, line: 12, column: 9) +!35 = !DILocation(line: 14, column: 5, scope: !34) +!36 = !DILocation(line: 17, column: 1, scope: !14) +!37 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 19, type: !15, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!38 = !DILocation(line: 19, column: 13, scope: !37) +!39 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !3, file: !3, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!40 = !DILocation(line: 18, column: 13, scope: !39) +!41 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 22, type: !42, scopeLine: 22, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!42 = !DISubroutineType(types: !43) +!43 = !{!6} +!44 = !DILocation(line: 23, column: 3, scope: !41) +!45 = !DILocation(line: 24, column: 1, scope: !41) -- Gitee