From 556f0c992c5590e254426a829de9caba15116215 Mon Sep 17 00:00:00 2001 From: yuqiuchu Date: Tue, 13 Aug 2024 00:14:44 +0800 Subject: [PATCH 1/6] chore: inital building for stackanalyzer as a LLVM tool. --- llvm/test/CMakeLists.txt | 1 + llvm/tools/stackanalyzer/CMakeLists.txt | 11 ++ llvm/tools/stackanalyzer/stackanalyzer.cpp | 135 ++++++++++++++++++ llvm/utils/gn/secondary/llvm/test/BUILD.gn | 1 + .../llvm/tools/stackanalyzer/BUILD.gn | 10 ++ .../llvm-project-overlay/llvm/BUILD.bazel | 17 +++ 6 files changed, 175 insertions(+) create mode 100644 llvm/tools/stackanalyzer/CMakeLists.txt create mode 100644 llvm/tools/stackanalyzer/stackanalyzer.cpp create mode 100644 llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 8cd77b72c987..71e0657fb065 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -142,6 +142,7 @@ set(LLVM_TEST_DEPENDS sancov sanstats split-file + stackanalyzer verify-uselistorder yaml-bench yaml2obj diff --git a/llvm/tools/stackanalyzer/CMakeLists.txt b/llvm/tools/stackanalyzer/CMakeLists.txt new file mode 100644 index 000000000000..f0309a18d0dc --- /dev/null +++ b/llvm/tools/stackanalyzer/CMakeLists.txt @@ -0,0 +1,11 @@ +set(LLVM_LINK_COMPONENTS + Analysis + BitReader + Core + Passes + Support +) + +add_llvm_tool(stackanalyzer + stackanalyzer.cpp +) \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/stackanalyzer.cpp b/llvm/tools/stackanalyzer/stackanalyzer.cpp new file mode 100644 index 000000000000..add2272e8bfa --- /dev/null +++ b/llvm/tools/stackanalyzer/stackanalyzer.cpp @@ -0,0 +1,135 @@ +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include + +using namespace llvm; + +static cl::OptionCategory StackAnalyzerCategory("StackAnalyzerCategory"); +static cl::opt + InputFilename(cl::Positional, cl::desc("Input .bc file to be analyzed"), + cl::cat(StackAnalyzerCategory)); +static cl::opt + UseCallGraph("callgraph", + cl::desc("Output the callgraph given the .bc file"), + cl::cat(StackAnalyzerCategory)); +static cl::opt + UseAnalysis("analysis", + cl::desc("Output possible path of the callgraph which can " + "possibly cause stack overflow"), + cl::cat(StackAnalyzerCategory)); +static cl::opt + LimitSize("stacksize", + cl::desc("max stack size of the limit of a path within the " + "callgraph, given the .bc " + "file"), + cl::init(1024), cl::cat(StackAnalyzerCategory)); +static cl::opt UseAnders( + "anders", + cl::desc("Use anders analysis to analyze the call graph of the .bc file"), + cl::init(false), cl::cat(StackAnalyzerCategory)); +static cl::opt + UseDebug("debug", + cl::desc("Enable debug output for the call graph analysis"), + cl::cat(StackAnalyzerCategory)); +static cl::opt InputSUFilename( + "sufile", cl::desc("Input .su file to be analyzed, given the .bc file"), + cl::init("/tmp/output.su"), cl::cat(StackAnalyzerCategory)); +static cl::opt OutputFilename( + "o", + cl::desc("Output callgraph in .dot format with stack cost information"), + cl::init("./callgraph.dot"), cl::cat(StackAnalyzerCategory)); + +static Expected> openBitcodeFile(StringRef Path) { + Expected> MemBufOrErr = + errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Path)); + if (Error E = MemBufOrErr.takeError()) + return E; + + std::unique_ptr MemBuf = std::move(*MemBufOrErr); + + return MemBuf; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + cl::HideUnrelatedOptions(StackAnalyzerCategory); + cl::ParseCommandLineOptions(argc, argv); + ExitOnError ExitOnErr("stackanalyzer: "); + + LLVMContext Context; + auto MB = ExitOnErr(openBitcodeFile(InputFilename)); + auto M = ExitOnErr(parseBitcodeFile(MB->getMemBufferRef(), Context)); + + // auto Config = PointerAnalysisCLIConfig{UseAnders, UseDebug}; + + ModuleAnalysisManager MAM; + PassBuilder PB; + PB.registerModuleAnalyses(MAM); + MAM.registerPass([/*Config*/] { return CallGraphAnalysis(); }); + ModulePassManager MPM; + MPM.addPass(RequireAnalysisPass()); + MPM.run(*M, MAM); + + MapVector StackSize; + for (auto &F : *M) { + StackSize.insert(std::make_pair(&F, 0)); + } + // writeModuleToFile(M.get()); + // parseStackSizeFromSU(InputSUFilename, *M, StackSize); + + const auto &Graph = MAM.getResult(*M); + + if (UseCallGraph) { + std::error_code EC; + raw_fd_ostream File("./test/callgraph/callgraph.dot", EC, sys::fs::OF_Text); + + if (!EC) { + File << "digraph \"CallGraph\" {\n"; + for (auto &NodePair : Graph) { + CallGraphNode *Node = NodePair.second.get(); + if (Function *F = Node->getFunction()) { + File << " \"" << F->getName() << "\";\n"; + } + } + for (auto &NodePair : Graph) { + CallGraphNode *Node = NodePair.second.get(); + if (Function *F = Node->getFunction()) { + File << " \"" << F->getName() << "\" [label=\"" << F->getName() + << "\\nStack Size: " << StackSize[F] << " bytes\"];\n"; + } + } + for (auto &NodePair : Graph) { + CallGraphNode *Node = NodePair.second.get(); + if (Function *F = Node->getFunction()) { + for (auto &CallRecord : *Node) { + if (Function *Callee = CallRecord.second->getFunction()) { + File << " \"" << F->getName() << "\" -> \"" << Callee->getName() + << "\";\n"; + } + } + } + } + + File << "}\n"; + } + } + if (UseAnalysis) { + + // StackOverflowDetector Detector{LimitSize}; + // Detector.analyze(Graph, StackSize); + // Detector.printResults(outs()); + } + + return 0; +} \ No newline at end of file diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index 2f46527b613a..5da4fdc912d5 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -316,6 +316,7 @@ group("test") { "//llvm/tools/opt", "//llvm/tools/sancov", "//llvm/tools/sanstats", + "//llvm/tools/stackanalyzer", "//llvm/tools/verify-uselistorder", "//llvm/tools/yaml2obj", "//llvm/unittests", diff --git a/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn new file mode 100644 index 000000000000..373b12c8df1c --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn @@ -0,0 +1,10 @@ +executable("stackanalyzer") { + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/Core", + "//llvm/lib/Passes", + "//llvm/lib/Bitcode/Reader", + "//llvm/lib/Support", + ] + sources = [ "stackanalyzer.cpp" ] +} diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index a7e9398ea8fd..85d993d84a30 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4710,6 +4710,23 @@ cc_binary( ], ) +cc_binary( + name = "stackanalyzer", + srcs = glob([ + "tools/stackanalyzer/*.cpp", + "tools/stackanalyzer/*.h", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":BitcodeReader", + ":Core", + ":Passes", + ":Support", + ], +) + cc_binary( name = "split-file", srcs = glob([ -- Gitee From 5a9ac801f0e89025d109acb8da811fcc35e684ee Mon Sep 17 00:00:00 2001 From: yuqiuchu Date: Wed, 14 Aug 2024 00:11:27 +0800 Subject: [PATCH 2/6] chore: imigrating code from llvm-17 docker environment. --- llvm/tools/stackanalyzer/CMakeLists.txt | 2 + llvm/tools/stackanalyzer/CallGraphGen.cpp | 431 ++++++++++++++ llvm/tools/stackanalyzer/CallGraphGen.h | 558 ++++++++++++++++++ llvm/tools/stackanalyzer/StackUsage.cpp | 134 +++++ llvm/tools/stackanalyzer/StackUsage.h | 70 +++ llvm/tools/stackanalyzer/stackanalyzer.cpp | 21 +- .../llvm/tools/stackanalyzer/BUILD.gn | 6 +- 7 files changed, 1211 insertions(+), 11 deletions(-) create mode 100644 llvm/tools/stackanalyzer/CallGraphGen.cpp create mode 100644 llvm/tools/stackanalyzer/CallGraphGen.h create mode 100644 llvm/tools/stackanalyzer/StackUsage.cpp create mode 100644 llvm/tools/stackanalyzer/StackUsage.h diff --git a/llvm/tools/stackanalyzer/CMakeLists.txt b/llvm/tools/stackanalyzer/CMakeLists.txt index f0309a18d0dc..f1d67e8e09e9 100644 --- a/llvm/tools/stackanalyzer/CMakeLists.txt +++ b/llvm/tools/stackanalyzer/CMakeLists.txt @@ -7,5 +7,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_tool(stackanalyzer + CallGraphGen.cpp stackanalyzer.cpp + StackUsage.cpp ) \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/CallGraphGen.cpp b/llvm/tools/stackanalyzer/CallGraphGen.cpp new file mode 100644 index 000000000000..4a2a72b83f18 --- /dev/null +++ b/llvm/tools/stackanalyzer/CallGraphGen.cpp @@ -0,0 +1,431 @@ +//===--- CallGraphGen.cpp - Analyze the callgraph of a LLVM bitcode file using +// pointer analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CallGraphGen.h" + +using namespace llvm; + +unsigned ConstraintGraphNode::NodeIndex = 0; + +std::unique_ptr +ConstraintGraph::createInitialConstraintNode() { + auto *Int32Type = llvm::Type::getInt32Ty(M.getContext()); + auto *Int32PtrType = llvm::PointerType::get(Int32Type, 0); + auto *NullInt32Ptr = llvm::ConstantPointerNull::get(Int32PtrType); + return std::make_unique((NullInt32Ptr), this); +} + +ConstraintGraph::ConstraintGraph(Module &Module) : M(Module) { + InitialConstraintNode = createInitialConstraintNode().get(); + for (auto &F : M) { + if (F.hasExternalLinkage()) + continue; + if (F.isIntrinsic()) + continue; + } +} + +ConstraintGraph::~ConstraintGraph() { + // [todo][implementation] memory management +} + +ConstraintGraphNode *ConstraintGraph::getOrInsertConstraintNode(Value *V) { + auto &CSN = ConstraintGraphNodes[V]; + if (CSN) { + return CSN.get(); + } + CSN = std::make_unique(V, this); + return CSN.get(); +} + +Constraint *ConstraintGraph::getOrInsertConstraint(Value *Dst, Value *Src, + ConstraintKind Kind) { + for (auto &C : Constraints) { + if (C->Src->V == Src && C->Dst->V == Dst && C->Kind == Kind) { + return C; + } + } + auto *SrcNode = getOrInsertConstraintNode(Src); + auto *DstNode = getOrInsertConstraintNode(Dst); + auto *ConstraitEdge = new Constraint(DstNode, SrcNode, Kind); + SrcNode->addConstraint(DstNode, Kind); + Constraints.push_back(ConstraitEdge); + return ConstraitEdge; +} + +AnalysisKey PACallGraphAnalysis::Key; + +PACallGraphAnalysis::Result PACallGraphAnalysis::run(Module &M, + ModuleAnalysisManager &) { + auto FI = M.rbegin(), FE = M.rend(); + for (; (FI->hasExternalLinkage() || FI->isIntrinsic()) && FI != FE; ++FI) { + // start from main for now + if (FI->getName() == "main") { + break; + } + } + DataflowResult::Type ResultFact; + PAAnalysisDataflowFacts InitFact; + PointerAnalysisVisitor PAVisitor(M); + if (Config.UseAnders) { + compForwardDataflow(&(*FI), &PAVisitor, &ResultFact, InitFact); + PAVisitor.solveConstraint(); + } + if (Config.UseDebug) { + PAVisitor.printConstraintGraph(outs()); + PAVisitor.printPointToSetMap(outs()); + } + return std::move(PAVisitor.CG); +} + +// for debugging purpose, reference: +// https://github.com/SunnyWadkar/LLVM-DataFlow-Analysis/blob/master/Dataflow/available-support.cpp +static std::string getShortValueName(const Value *V) { + if (auto *Arg = dyn_cast(V)) { + return Arg->getName().str() + ":" + Arg->getParent()->getName().str(); + } + if (auto *InstV = dyn_cast(V)) { + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + V->print(*Strm); + std::string Inst = Strm->str(); + size_t Idx1 = Inst.find("%"); + size_t Idx2 = Inst.find(" ", Idx1); + if (Idx1 != std::string::npos && Idx2 != std::string::npos) { + return Inst.substr(Idx1, Idx2 - Idx1) + ":" + + InstV->getFunction()->getName().str(); + } + return "\"" + Inst + "\""; + } + if (const ConstantInt *Cint = dyn_cast(V)) { + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + Cint->getValue().print(*Strm, true); + return Strm->str(); + } + if (V->getName().str().length() > 0) { + return V->getName().str(); + } + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + V->print(*Strm); + std::string Inst = Strm->str(); + return "\"" + Inst + "\""; +} + +PointerAnalysisVisitor::PointerAnalysisVisitor(Module &InitModule) + : CSG(ConstraintGraph(InitModule)), CG(InitModule) { + for (auto &F : InitModule) { + if (F.hasExternalLinkage()) + continue; + if (F.isIntrinsic()) + continue; + } +} + +void PointerAnalysisVisitor::merge(PAAnalysisDataflowFacts *Facts, + const PAAnalysisDataflowFacts &OtherFacts) { + Facts->insert(Facts->end(), OtherFacts.begin(), OtherFacts.end()); + std::sort(Facts->begin(), Facts->end()); + auto Last = std::unique(Facts->begin(), Facts->end()); + Facts->erase(Last, Facts->end()); +} + +void PointerAnalysisVisitor::compDFVal(Instruction *Inst, + PAAnalysisDataflowFacts *Dfval) { + switch (Inst->getOpcode()) { + case Instruction::Load: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::Store: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::Call: { + transfer(dyn_cast(Inst), Dfval); + break; + } + } +} + +void PointerAnalysisVisitor::transfer(LoadInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Addr = Inst->getPointerOperand(); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, Addr, ConstraintKind::Load); + Dfval->push_back(Constraint); + Constraints.push_back(Constraint); +} + +void PointerAnalysisVisitor::transfer(StoreInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Addr = Inst->getPointerOperand(); + auto *Val = Inst->getValueOperand(); + Constraint *Cstrt; + if (isa(Val)) { + Cstrt = CSG.getOrInsertConstraint(Addr, Val, ConstraintKind::GetAddr); + } else { + Cstrt = CSG.getOrInsertConstraint(Addr, Val, ConstraintKind::Store); + } + Dfval->push_back(Cstrt); + Constraints.push_back(Cstrt); +} + +void PointerAnalysisVisitor::transfer(CallInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Callee = Inst->getCalledFunction(); + auto *PrevFunction = CurrentFunction; + if (!Callee) { + auto *CalleeValue = Inst->getCalledOperand(); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, CalleeValue, ConstraintKind::Unsolved); + for (unsigned I = 0, NumOperands = Inst->arg_size(); I != NumOperands; + ++I) { + auto *RArg = Inst->getArgOperand(I); + auto *CSGN = CSG.getOrInsertConstraintNode(RArg); + UnresolvedArgs[Constraint->Src].push_back(CSGN); + } + Dfval->push_back(Constraint); + Constraints.push_back(Constraint); + ConstraintFunctionMap[Constraint] = CurrentFunction; + } else { + if (Callee->isIntrinsic() || Callee->isDeclaration()) + return; + for (unsigned I = 0, NumOperands = Inst->arg_size(); I != NumOperands; + ++I) { + auto *RArg = Inst->getArgOperand(I); + auto *FArg = Callee->getArg(I); + auto *Constraint = + CSG.getOrInsertConstraint(FArg, RArg, ConstraintKind::Copy); + Dfval->push_back(Constraint); + Constraints.push_back(Constraint); + } + DataflowResult::Type SubroutineResult; + PAAnalysisDataflowFacts SubroutineInitFact; + compForwardDataflow(Callee, this, &SubroutineResult, SubroutineInitFact); + CurrentFunction = PrevFunction; + } +} + +using ConstraintSolverFn = + void (PointerAnalysisVisitor::*)(const Constraint *Cstrt); + +static ConstraintSolverFn ConstraintSolvers[] = { + &PointerAnalysisVisitor::solveCopyConstraint, + &PointerAnalysisVisitor::solveGetAddrConstraint, + &PointerAnalysisVisitor::solveLoadConstraint, + &PointerAnalysisVisitor::solveStoreConstraint, + &PointerAnalysisVisitor::solveUnsolvedConstraint, + nullptr // ConstraintKind::Init +}; + +void PointerAnalysisVisitor::solveConstraint() { + // [todo][optimize] optimize this worklist algorithm using incremental + // point-to set information + CollectedConstraints = + std::set(Constraints.begin(), Constraints.end()); + while (!Constraints.empty()) { + auto *Constraint = Constraints.front(); + Constraints.pop_front(); + auto FormerPointToSet = PointToSetMap; + (this->*ConstraintSolvers[static_cast(Constraint->Kind)])( + Constraint); + if ((PointToSetMap[Constraint->Src] != FormerPointToSet[Constraint->Src]) || + (PointToSetMap[Constraint->Dst] != FormerPointToSet[Constraint->Dst])) { + Constraints.push_back(Constraint); + } + CollectedConstraints.insert(Constraint); + } +} + +void PointerAnalysisVisitor::propagate(const ConstraintGraphNode *Src) { + // [todo][optimize] handle cycles in propagation + for (auto Constraint : Src->getConstraintSuccs()) { + auto *Dst = Constraint.first; + auto Kind = Constraint.second; + auto Solver = ConstraintSolvers[static_cast(Kind)]; + (this->*Solver)(CSG.getOrInsertConstraint(Dst->V, Src->V, Kind)); + } +} + +void PointerAnalysisVisitor::solveLoadConstraint(const Constraint *Cstrt) { + if (PointToSetMap[Cstrt->Src].empty()) { + PointToSetMap[Cstrt->Src].insert(Cstrt->Dst->V); + return; + } + for (auto *PointToValue : PointToSetMap[Cstrt->Src]) { + auto *CSGN = CSG.getOrInsertConstraintNode(PointToValue); + PointToSetMap[Cstrt->Dst].insert(PointToSetMap[CSGN].begin(), + PointToSetMap[CSGN].end()); + } + propagate(Cstrt->Dst); +} + +void PointerAnalysisVisitor::solveStoreConstraint(const Constraint *Cstrt) { + if (PointToSetMap[Cstrt->Dst].empty()) { + PointToSetMap[Cstrt->Dst].insert(Cstrt->Src->V); + return; + } + for (auto *PointToValue : PointToSetMap[Cstrt->Dst]) { + auto *Node = CSG.getOrInsertConstraintNode(PointToValue); + PointToSetMap[Node].insert(PointToSetMap[Cstrt->Src].begin(), + PointToSetMap[Cstrt->Src].end()); + if (Node != Cstrt->Src) + propagate(Node); + } +} + +void PointerAnalysisVisitor::solveGetAddrConstraint(const Constraint *Cstrt) { + PointToSetMap[Cstrt->Src].insert(Cstrt->Src->V); + PointToSetMap[Cstrt->Dst].insert(Cstrt->Src->V); + propagate(Cstrt->Dst); +} + +void PointerAnalysisVisitor::solveCopyConstraint(const Constraint *Cstrt) { + PointToSetMap[Cstrt->Dst].insert(PointToSetMap[Cstrt->Src].begin(), + PointToSetMap[Cstrt->Src].end()); + propagate(Cstrt->Dst); +} + +void PointerAnalysisVisitor::solveUnsolvedConstraint(const Constraint *Cstrt) { + auto *Call = dyn_cast(Cstrt->Dst->V); + assert(Call && "Dst should be a CallInst in a Unresolved constraint"); + for (auto *PointToValue : PointToSetMap[Cstrt->Src]) { + if (auto *Callee = dyn_cast(PointToValue)) { + DataflowResult::Type SubroutineResult; + PAAnalysisDataflowFacts SubroutineInitFact; + compForwardDataflow(Callee, this, &SubroutineResult, SubroutineInitFact); + PointToSetMap[Cstrt->Dst].insert( + FunctionReturnValuePointToSetMap[Callee].begin(), + FunctionReturnValuePointToSetMap[Callee].end()); + for (unsigned I = 0, NumOperands = static_cast( + UnresolvedArgs[Cstrt->Src].size()); + I != NumOperands; ++I) { + auto *RArg = UnresolvedArgs[Cstrt->Src][I]->V; + auto *FArg = Callee->getArg(I); + auto *Constraint = + CSG.getOrInsertConstraint(FArg, RArg, ConstraintKind::Copy); + Constraints.push_back(Constraint); + } + auto *CallGraphNode = CG[ConstraintFunctionMap[Cstrt]]; + bool Extend = false; + for (auto CallNode : *CallGraphNode) { + if (CallNode.second->getFunction() == Callee) + Extend = true; + } + if (!Extend) { + CallGraphNode->addCalledFunction(Call, CG.getOrInsertFunction(Callee)); + } + } + } +} + +std::array(ConstraintKind::Init)> + ConstraintKindToString = {"Copy", "GetAddr", "Load", "Store", "Unresolved"}; + +/** + * @brief Prints the constraint graph. + * + * This function prints the constraint graph in the DOT format. The graph + * represents the constraints between different values in the analysis. The + * constraints are grouped by function name and printed accordingly. If a value + * has function information, it is grouped under the respective function name. + * If a value does not have function information, it is grouped under + * "CrossFunction". + * + * @param OS The output stream to which the graph will be printed. + */ +void PointerAnalysisVisitor::printConstraintGraph(raw_ostream &OS) { + OS << "digraph \"Constraint Graph\" {\n"; + + // Map to group constraints by function name + std::map> FunctionConstraints; + + for (const auto &Constraint : CollectedConstraints) { + std::string SrcName = getShortValueName(Constraint->Src->V); + std::string DstName = getShortValueName(Constraint->Dst->V); + + // Extract function names from source and destination + size_t SrcFunctionIdx = SrcName.find(":"); + size_t DstFunctionIdx = DstName.find(":"); + + std::string FunctionName; + + if (SrcFunctionIdx != std::string::npos) { + FunctionName = SrcName.substr(SrcFunctionIdx + 1); + } else if (DstFunctionIdx != std::string::npos) { + FunctionName = DstName.substr(DstFunctionIdx + 1); + } + + std::string ConstraintStr = + " \"" + SrcName + "\" -> \"" + DstName + "\" [label=\"" + + ConstraintKindToString[static_cast(Constraint->Kind)] + + "\"];\n"; + + if (SrcFunctionIdx != std::string::npos && + DstFunctionIdx != std::string::npos && + SrcName.substr(SrcFunctionIdx + 1) == + DstName.substr(DstFunctionIdx + 1)) { + // Both have function information and it's the same + FunctionConstraints[FunctionName].push_back(ConstraintStr); + } else if (!FunctionName.empty()) { + // At least one has function information, so group by that + FunctionConstraints[FunctionName].push_back(ConstraintStr); + } else { + // If neither has function information (should not happen), group under + // "CrossFunction" + FunctionConstraints["CrossFunction"].push_back(ConstraintStr); + } + } + + // Print grouped constraints by function + for (const auto &Entry : FunctionConstraints) { + OS << "// Function: " << Entry.first << "\n"; + for (const auto &Cstrt : Entry.second) { + OS << Cstrt; + } + } + + OS << "}\n"; +} + +void PointerAnalysisVisitor::printPointToSetMap(raw_ostream &OS) { + // Map to group PointToSet entries by function name + std::map>>> + FunctionPointSetMap; + + for (const auto &Map : PointToSetMap) { + std::string VarName = getShortValueName(Map.first->V); + + // Extract function name from the variable name + size_t FunctionIdx = VarName.find(":"); + if (FunctionIdx != std::string::npos) { + std::string FunctionName = VarName.substr(FunctionIdx + 1); + + // Store the variable and its PointToSet values in the corresponding + // function's group + FunctionPointSetMap[FunctionName].emplace_back( + VarName, std::vector(Map.second.begin(), Map.second.end())); + } + } + + // Print grouped PointToSet entries by function + for (const auto &Entry : FunctionPointSetMap) { + OS << "// Function: " << Entry.first << "\n"; + for (const auto &VarAndPointToSet : Entry.second) { + OS << "PointToSet for " << VarAndPointToSet.first << ":"; + for (const auto &PointToValue : VarAndPointToSet.second) { + OS << " " << getShortValueName(PointToValue) << ""; + } + OS << "\n"; + } + } +} \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/CallGraphGen.h b/llvm/tools/stackanalyzer/CallGraphGen.h new file mode 100644 index 000000000000..1583a3f6f023 --- /dev/null +++ b/llvm/tools/stackanalyzer/CallGraphGen.h @@ -0,0 +1,558 @@ +//===--- CallGraphGen.h - Analyze the callgraph of a LLVM bitcode file using +// pointer analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_CALLGRAPHGEN_H +#define LLVM_TOOLS_CALLGRAPHGEN_H + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +namespace llvm { + +/// Base dataflow visitor class, defines the dataflow function +template class DataflowVisitor { +public: + virtual ~DataflowVisitor() {} + + Function *CurrentFunction; + /** + * @brief Dataflow Function invoked for each basic block. + * + * @param Block The Basic Block. + * @param Dfval The input dataflow value. + * @param Forward True to compute dfval forward, otherwise backward. + */ + void compDFVal(BasicBlock *Block, T *Dfval, bool Forward) { + if (Forward == true) { + for (BasicBlock::iterator II = Block->begin(), IE = Block->end(); + II != IE; ++II) { + Instruction *Inst = &*II; + compDFVal(Inst, Dfval); + } + } else { + for (BasicBlock::reverse_iterator II = Block->rbegin(), + IE = Block->rend(); + II != IE; ++II) { + Instruction *Inst = &*II; + compDFVal(Inst, Dfval); + } + } + } + + /** + * @brief Dataflow Function invoked for each instruction. + * + * @param Inst The Instruction. + * @param Dfval The input dataflow value. + * @return True if dfval changed. + */ + virtual void compDFVal(Instruction *Inst, T *Dfval) = 0; + + /** + * @brief Merge of two dfvals, dest will be the merged result. + * + * @param Dest The destination dataflow value. + * @param Src The source dataflow value. + * @return True if dest changed. + */ + virtual void merge(T *Dest, const T &Src) = 0; +}; + +/** + * @brief Dummy class to provide a typedef for the detailed result set. + * For each basicblock, we compute its input dataflow val and its output + * dataflow val. + */ +template struct DataflowResult { + typedef typename std::map> Type; +}; + +/** + * @brief Compute a forward iterated fixedpoint dataflow function, using a + * user-supplied visitor function. + * + * Note that the caller must ensure that the function is in fact a monotone + * function, as otherwise the fixedpoint may not terminate. + * + * @param Fn The function. + * @param Visitor A function to compute dataflow vals. + * @param Result The results of the dataflow. + * @param Initval The initial dataflow value. + */ +template +void compForwardDataflow(Function *Fn, DataflowVisitor *Visitor, + typename DataflowResult::Type *Result, + const T &Initval) { + Visitor->CurrentFunction = Fn; + std::set Worklist; + + // Initialize the worklist with all exit blocks + for (Function::iterator BI = Fn->begin(); BI != Fn->end(); ++BI) { + BasicBlock *BB = &*BI; + Result->insert(std::make_pair(BB, std::make_pair(Initval, Initval))); + Worklist.insert(BB); + } + + // Iteratively compute the dataflow result + while (!Worklist.empty()) { + BasicBlock *BB = *Worklist.begin(); + Worklist.erase(Worklist.begin()); + + // Merge all incoming value + T BBEnterval = (*Result)[BB].first; + for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + Visitor->merge(&BBEnterval, (*Result)[Pred].second); + } + + (*Result)[BB].first = BBEnterval; + + Visitor->compDFVal(BB, &BBEnterval, true); + + // If outgoing value changed, propagate it along the CFG + if (BBEnterval == (*Result)[BB].second) + continue; + (*Result)[BB].second = BBEnterval; + + for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { + Worklist.insert(*SI); + } + } +} + +/** + * @brief Compute a backward iterated fixedpoint dataflow function, using a + * user-supplied visitor function. + * + * Note that the caller must ensure that the function is in fact a monotone + * function, as otherwise the fixedpoint may not terminate. + * + * @param Fn The function. + * @param Visitor A function to compute dataflow vals. + * @param Result The results of the dataflow. + * @param Initval The initial dataflow value. + */ +template +void compBackwardDataflow(Function *Fn, DataflowVisitor *Visitor, + typename DataflowResult::Type *Result, + const T &Initval) { + Visitor->CurrentFunction = Fn; + std::set Worklist; + + // Initialize the worklist with all entry blocks + for (Function::iterator BI = Fn->begin(); BI != Fn->end(); ++BI) { + BasicBlock *BB = &*BI; + Result->insert(std::make_pair(BB, std::make_pair(Initval, Initval))); + Worklist.insert(BB); + } + + // Iteratively compute the dataflow result + while (!Worklist.empty()) { + BasicBlock *BB = *Worklist.begin(); + Worklist.erase(Worklist.begin()); + + // Merge all outgoing value + T BBExitval = (*Result)[BB].second; + for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { + BasicBlock *Succ = *SI; + Visitor->merge(&BBExitval, (*Result)[Succ].first); + } + + (*Result)[BB].second = BBExitval; + + Visitor->compDFVal(BB, &BBExitval, false); + + // If incoming value changed, propagate it along the CFG + if (BBExitval == (*Result)[BB].first) + continue; + (*Result)[BB].first = BBExitval; + + for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + Worklist.insert(*PI); + } + } +} + +/** + * @brief Enum representing different kinds of constraints. + */ +enum class ConstraintKind { + Copy = 0, + GetAddr, + Load, + Store, + Unsolved, + Init, +}; + +class ConstraintGraph; + +/** + * @brief Class representing a node in the constraint graph. + */ +class ConstraintGraphNode { +public: + static unsigned NodeIndex; + using ConstraintRecord = std::pair; + + /** + * @brief Constructor for ConstraintGraphNode. + * + * @param VI The value associated with the node. + * @param CSG The constraint graph. + */ + ConstraintGraphNode(Value *VI, ConstraintGraph *CSG) + : CSG(CSG), V(VI), Index(++NodeIndex) {} + ConstraintGraphNode(const ConstraintGraphNode &) = delete; + ConstraintGraphNode &operator=(const ConstraintGraphNode &) = delete; + + /** + * @brief Destructor for ConstraintGraphNode. + */ + ~ConstraintGraphNode() { + for (auto Succ : ConstraintedSuccs) { + Succ.first->dropRef(); + } + } + + using iterator = std::vector::iterator; + using const_iterator = std::vector::const_iterator; + + inline iterator begin() { return ConstraintedSuccs.begin(); } + inline iterator end() { return ConstraintedSuccs.end(); } + inline const_iterator begin() const { return ConstraintedSuccs.begin(); } + inline const_iterator end() const { return ConstraintedSuccs.end(); } + inline bool empty() const { return ConstraintedSuccs.empty(); } + inline unsigned size() const { + return static_cast(ConstraintedSuccs.size()); + } + + void addRef() { ++NumReferences; } + void dropRef() { --NumReferences; } + void allReferencesDropped() { NumReferences = 0; } + + ConstraintGraphNode *operator[](unsigned Idx) const { + return ConstraintedSuccs[Idx].first; + } + + /** + * @brief Add a constraint to the node. + * + * @param Succ The successor node. + * @param Kind The kind of constraint. + */ + void addConstraint(ConstraintGraphNode *Succ, ConstraintKind Kind) { + ConstraintedSuccs.push_back(std::make_pair(Succ, Kind)); + Succ->addRef(); + } + + /** + * @brief Get the successor constraints of the node. + * + * @return A vector of successor constraints. + */ + std::vector getConstraintSuccs() const { + return ConstraintedSuccs; + } + +private: + friend class ConstraintGraph; + friend class PointerAnalysisVisitor; + + ConstraintGraph *CSG; + + std::vector ConstraintedSuccs; + + unsigned NumReferences = 0; + + Value *V; + +public: + unsigned Index; +}; + +/** + * @brief Struct representing a constraint. + */ +struct Constraint { + ConstraintGraphNode *Dst; + ConstraintGraphNode *Src; + ConstraintKind Kind; + +public: + /** + * @brief Constructor for Constraint. + * + * @param CSDst The destination node. + * @param CSSrc The source node. + * @param CSKind The kind of constraint. + */ + Constraint(ConstraintGraphNode *CSDst, ConstraintGraphNode *CSSrc, + ConstraintKind CSKind) + : Dst(CSDst), Src(CSSrc), Kind(CSKind) {} + + bool operator==(const Constraint &Other) const { + return Src == Other.Src && Dst == Other.Dst && Kind == Other.Kind; + } + + bool operator<(const Constraint &Other) const { + if (Src->Index < Other.Src->Index) + return true; + if (Dst->Index < Other.Dst->Index) + return true; + return false; + } +}; + +/** + * @brief Class representing the constraint graph. + */ +class ConstraintGraph { + Module &M; + + using ConstraintNodeMap = + std::map>; + + ConstraintNodeMap ConstraintGraphNodes; + + ConstraintGraphNode *InitialConstraintNode; + + /** + * @brief Helper function to get or create a constraint node for + * initialization. + * + * @return A unique pointer to the created constraint node. + */ + std::unique_ptr createInitialConstraintNode(); + + // for debug purpose + std::vector Constraints; + +public: + /** + * @brief Constructor for ConstraintGraph. + * + * @param Module The module. + */ + explicit ConstraintGraph(Module &Module); + ~ConstraintGraph(); + + using iterator = ConstraintNodeMap::iterator; + using const_iterator = ConstraintNodeMap::const_iterator; + + inline iterator begin() { return ConstraintGraphNodes.begin(); } + inline iterator end() { return ConstraintGraphNodes.end(); } + inline const_iterator begin() const { return ConstraintGraphNodes.begin(); } + inline const_iterator end() const { return ConstraintGraphNodes.end(); } + + /** + * @brief Get the initial constraint node. + * + * @return The initial constraint node. + */ + ConstraintGraphNode *getInitialConstraintNode() const { + return InitialConstraintNode; + } + + /** + * @brief Get or insert a constraint node. + * + * @param V The value. + * @return The constraint node. + */ + ConstraintGraphNode *getOrInsertConstraintNode(Value *V); + + /** + * @brief Get or insert a constraint. + * + * @param Dst The destination value. + * @param Src The source value. + * @param Kind The kind of constraint. + * @return The constraint. + */ + Constraint *getOrInsertConstraint(Value *Dst, Value *Src, + ConstraintKind Kind); + + ConstraintGraphNode *operator[](const Value *V) { + return ConstraintGraphNodes[V].get(); + } +}; + +template <> struct GraphTraits { + using NodeRef = const ConstraintGraphNode *; + using CSNPairTy = ConstraintGraphNode::ConstraintRecord; + using EdgeRef = const ConstraintGraphNode::ConstraintRecord; + + static NodeRef getEntryNode(const ConstraintGraphNode *CSN) { return CSN; } + static const ConstraintGraphNode *CSNGetValue(CSNPairTy P) { return P.first; } + + using ChildIteratorType = mapped_iterator; + using ChildEdgeIteratorType = ConstraintGraphNode::const_iterator; + + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &CSNGetValue); + } + + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &CSNGetValue); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } + + static NodeRef edge_dest(EdgeRef E) { return E.first; } +}; + +template <> struct GraphTraits { + using PairTy = + std::pair>; + using NodeRef = const ConstraintGraphNode *; + using EdgeRef = const ConstraintGraphNode::ConstraintRecord; + + static NodeRef getEntryNode(const ConstraintGraph *CSG) { + return CSG->getInitialConstraintNode(); + } + + using nodes_iterator = ConstraintGraph::const_iterator; + + static nodes_iterator nodes_begin(const ConstraintGraph *CSG) { + return CSG->begin(); + } + + static nodes_iterator nodes_end(const ConstraintGraph *CSG) { + return CSG->end(); + } +}; + +template <> +struct GraphTraits + : public GraphTraits { + using PairTy = + std::pair>; + + static NodeRef getEntryNode(const ConstraintGraph *CSG) { + return CSG->getInitialConstraintNode(); + } + + using nodes_iterator = ConstraintGraph::const_iterator; + + static nodes_iterator nodes_begin(const ConstraintGraph *CSG) { + return CSG->begin(); + } + + static nodes_iterator nodes_end(const ConstraintGraph *CSG) { + return CSG->end(); + } +}; + +/** + * @struct PointerAnalysisCLIConfig + * @brief Configuration options for pointer analysis CLI. + */ +struct PointerAnalysisCLIConfig { + bool UseAnders; + bool UseDebug; +}; + +/// An analysis pass to compute the \c CallGraph for a \c Module using pointer +/// analysis. +/// +/// This class implements the concept of an analysis pass used by the \c +/// ModuleAnalysisManager to run an analysis over a module and cache the +/// resulting data. +class PACallGraphAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + + static AnalysisKey Key; + + PointerAnalysisCLIConfig Config; + +public: + explicit PACallGraphAnalysis(PointerAnalysisCLIConfig Config) + : Config(Config) {} + + /// A formulaic type to inform clients of the result type. + using Result = CallGraph; + + /// Compute the \c CallGraph for the module \c M. + CallGraph run(Module &M, ModuleAnalysisManager &); +}; + +using PAAnalysisDataflowFacts = std::vector; + +/** + * @class PointerAnalysisVisitor + * @brief A visitor class for performing pointer analysis on a given module. + * + * This class inherits from the DataflowVisitor class + * and is responsible for performing pointer analysis on a given module. It + * maintains various data structures and maps to store information related to + * constraint graphs, call graphs, point-to sets, unresolved arguments, function + * return value point-to sets, constraint-function mappings, and a deque of + * constraints. + * + * The main functionality of this class includes solving constraints, merging + * dataflow facts, computing dataflow values for instructions, and transferring + * dataflow facts for load, store, and call instructions. It also provides + * methods for propagating constraints, solving specific types of constraints, + * and printing the constraint graph and point-to set map. + * + * @see DataflowVisitor + * @see PAAnalysisDataflowFacts + */ +class PointerAnalysisVisitor : public DataflowVisitor { + ConstraintGraph CSG; + CallGraph CG; + // [todo][optimize] use more efficient data structures + std::map> PointToSetMap; + std::map> + UnresolvedArgs; + std::map> + FunctionReturnValuePointToSetMap; + std::map ConstraintFunctionMap; + std::deque Constraints; + std::set CollectedConstraints; + +public: + friend class PACallGraphAnalysis; + PointerAnalysisVisitor(Module &Module); + void solveConstraint(); + +public: + void merge(PAAnalysisDataflowFacts *Facts, + const PAAnalysisDataflowFacts &OtherFacts) override; + void compDFVal(Instruction *Inst, PAAnalysisDataflowFacts *Dfval) override; + void transfer(LoadInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(StoreInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(CallInst *Inst, PAAnalysisDataflowFacts *Dfval); + + void propagate(const ConstraintGraphNode *Src); + void solveLoadConstraint(const Constraint *Cstrt); + void solveStoreConstraint(const Constraint *Cstrt); + void solveGetAddrConstraint(const Constraint *Cstrt); + void solveCopyConstraint(const Constraint *Cstrt); + void solveUnsolvedConstraint(const Constraint *Cstrt); + void printConstraintGraph(raw_ostream &OS); + void printPointToSetMap(raw_ostream &OS); +}; + +} // namespace llvm + +#endif diff --git a/llvm/tools/stackanalyzer/StackUsage.cpp b/llvm/tools/stackanalyzer/StackUsage.cpp new file mode 100644 index 000000000000..e49c2bb312c8 --- /dev/null +++ b/llvm/tools/stackanalyzer/StackUsage.cpp @@ -0,0 +1,134 @@ +//===--- StackUsage.cpp - Analyze the callgraph of a LLVM bitcode file using +// pointer analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "StackUsage.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void llvm::parseStackSizeFromSU( + StringRef Filename, llvm::Module &Module, + MapVector &StackSizeMap) { + + // Open the file and read its contents + auto BufferOrError = MemoryBuffer::getFile(Filename); + if (std::error_code EC = BufferOrError.getError()) { + errs() << "Error opening file " << Filename << ": " << EC.message() << "\n"; + return; + } + + std::unique_ptr Buffer = std::move(BufferOrError.get()); + StringRef Content = Buffer->getBuffer(); + + // Split the file content into lines + SmallVector Lines; + Content.split(Lines, '\n'); + + // Iterate through each line + for (StringRef Line : Lines) { + if (Line.trim().empty()) + continue; // Skip empty lines + + // Split the line by tabs + SmallVector Parts; + Line.split(Parts, '\t', -1, false); + + if (Parts.size() < 3) { + errs() << "Invalid format in line: " << Line << "\n"; + continue; + } + + // Extract the function name and stack size + StringRef FullFunctionName = Parts[0]; + StringRef StackSizeStr = Parts[1]; + + // Parse the stack size + unsigned StackSize; + if (StackSizeStr.getAsInteger(10, StackSize)) { + errs() << "Invalid stack size in line: " << Line << "\n"; + continue; + } + + // Extract the function name (remove path and extension) + StringRef FunctionName = sys::path::filename(FullFunctionName); + FunctionName = FunctionName.rsplit(':').second; + + // Find the corresponding function in the module + Function *F = Module.getFunction(FunctionName); + if (!F) { + errs() << "Function " << FunctionName << " not found in module\n"; + continue; + } + + // Insert the function and its stack size into the map + StackSizeMap[F] = StackSize; + } +} + +void StackOverflowDetector::analyze( + const CallGraph &CG, + const MapVector &StackSizes) { + for (const auto &Node : CG) { + Function *F = Node.second->getFunction(); + if (!F) + continue; + if (F->isDeclaration()) + continue; + if (!Visited.count(F)) { + if (F->getName() == "main") + dfs(F, CG, StackSizes); + } + } +} + +void StackOverflowDetector::printResults(raw_ostream &OS) const { + OS << "Stack Overflow Detector Results(Limit:" << Threshold << ")\n"; + for (const auto &Path : OverflowPaths) { + OS << "Path:\n"; + for (auto *F : Path) { + OS << " " << F->getName() << "\n"; + } + } +} + +bool StackOverflowDetector::dfs( + Function *F, const CallGraph &CG, + const MapVector &StackSizes) { + Visited.insert(F); + unsigned CurrentStackSize = StackSizes.lookup(F); + PathStack.insert({F, CurrentStackSize}); + unsigned CumulativeStackSize = 0; + for (auto &Entry : PathStack) { + CumulativeStackSize += Entry.second; + } + if (CumulativeStackSize > Threshold) { + std::vector Path; + for (auto &Entry : PathStack) { + Path.push_back(Entry.first); + } + OverflowPaths.push_back(Path); + return true; + } + + auto *CGNode = CG[F]; + for (auto &Callee : *CGNode) { + Function *CalleeF = Callee.second->getFunction(); + if (CalleeF && !Visited.count(CalleeF) && !CalleeF->isDeclaration()) { + if (dfs(CalleeF, CG, StackSizes)) { + return true; + } + } + } + + PathStack.pop_back(); + return false; +} \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/StackUsage.h b/llvm/tools/stackanalyzer/StackUsage.h new file mode 100644 index 000000000000..067566766bec --- /dev/null +++ b/llvm/tools/stackanalyzer/StackUsage.h @@ -0,0 +1,70 @@ +//===--- StackUsage.h - Analyze the stack usage of functions inside a LLVM +// bitcode file ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_STACKUSAGE_H +#define LLVM_TOOLS_STACKUSAGE_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include + +namespace llvm { +/** + * @brief Parses the stack size from the stack usage file. + * + * This function reads the stack usage information from the specified .su file + * and populates the provided map with the stack sizes for each function + * in the given module. + * + * @param Filename The name of the stack usage file(in .su format) to read. + * @param Module The LLVM module containing the functions. + * @param StackSizeMap A map to be populated with the stack sizes for each + * function. + */ +void parseStackSizeFromSU( + llvm::StringRef Filename, llvm::Module &Module, + llvm::MapVector &StackSizeMap); + +/** + * @class StackOverflowDetector + * @brief A class that detects stack overflow in a program. + * + * The StackOverflowDetector class analyzes the call graph of a program and + * detects potential stack overflow paths. It uses a depth-first search + * algorithm to traverse the call graph and keeps track of the stack sizes of + * each function. The class provides a method to analyze the call graph and + * print the results. + * + * @note This class assumes that the call graph and stack sizes have already + * been computed. + */ +class StackOverflowDetector { + + SmallVector, 4> OverflowPaths; + MapVector PathStack; + std::set Visited; + unsigned Threshold; + + bool dfs(Function *F, const CallGraph &CG, + const MapVector &StackSizes); + +public: + StackOverflowDetector(unsigned Limit) : Threshold(Limit) {} + + void analyze(const CallGraph &CG, + const MapVector &); + + void printResults(raw_ostream &OS) const; +}; +} // namespace llvm + +#endif diff --git a/llvm/tools/stackanalyzer/stackanalyzer.cpp b/llvm/tools/stackanalyzer/stackanalyzer.cpp index add2272e8bfa..2fa502c7be1e 100644 --- a/llvm/tools/stackanalyzer/stackanalyzer.cpp +++ b/llvm/tools/stackanalyzer/stackanalyzer.cpp @@ -1,5 +1,7 @@ +#include "CallGraphGen.h" +#include "StackUsage.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -44,7 +46,7 @@ static cl::opt cl::cat(StackAnalyzerCategory)); static cl::opt InputSUFilename( "sufile", cl::desc("Input .su file to be analyzed, given the .bc file"), - cl::init("/tmp/output.su"), cl::cat(StackAnalyzerCategory)); + cl::init("./output.su"), cl::cat(StackAnalyzerCategory)); static cl::opt OutputFilename( "o", cl::desc("Output callgraph in .dot format with stack cost information"), @@ -71,12 +73,12 @@ int main(int argc, char **argv) { auto MB = ExitOnErr(openBitcodeFile(InputFilename)); auto M = ExitOnErr(parseBitcodeFile(MB->getMemBufferRef(), Context)); - // auto Config = PointerAnalysisCLIConfig{UseAnders, UseDebug}; + auto Config = PointerAnalysisCLIConfig{UseAnders, UseDebug}; ModuleAnalysisManager MAM; PassBuilder PB; PB.registerModuleAnalyses(MAM); - MAM.registerPass([/*Config*/] { return CallGraphAnalysis(); }); + MAM.registerPass([Config] { return PACallGraphAnalysis(Config); }); ModulePassManager MPM; MPM.addPass(RequireAnalysisPass()); MPM.run(*M, MAM); @@ -85,14 +87,13 @@ int main(int argc, char **argv) { for (auto &F : *M) { StackSize.insert(std::make_pair(&F, 0)); } - // writeModuleToFile(M.get()); - // parseStackSizeFromSU(InputSUFilename, *M, StackSize); + parseStackSizeFromSU(InputSUFilename, *M, StackSize); const auto &Graph = MAM.getResult(*M); if (UseCallGraph) { std::error_code EC; - raw_fd_ostream File("./test/callgraph/callgraph.dot", EC, sys::fs::OF_Text); + raw_fd_ostream File(OutputFilename, EC, sys::fs::OF_Text); if (!EC) { File << "digraph \"CallGraph\" {\n"; @@ -126,9 +127,9 @@ int main(int argc, char **argv) { } if (UseAnalysis) { - // StackOverflowDetector Detector{LimitSize}; - // Detector.analyze(Graph, StackSize); - // Detector.printResults(outs()); + StackOverflowDetector Detector{LimitSize}; + Detector.analyze(Graph, StackSize); + Detector.printResults(outs()); } return 0; diff --git a/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn index 373b12c8df1c..666fc0a009ec 100644 --- a/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn @@ -6,5 +6,9 @@ executable("stackanalyzer") { "//llvm/lib/Bitcode/Reader", "//llvm/lib/Support", ] - sources = [ "stackanalyzer.cpp" ] + sources = [ + "CallGraphGen.cpp", + "StackUsage.cpp", + "stackanalyzer.cpp", + ] } -- Gitee From 148ae24f1dfb104fcc74f5146f295b23dbcd248d Mon Sep 17 00:00:00 2001 From: yuqiuchu Date: Wed, 14 Aug 2024 09:44:45 +0800 Subject: [PATCH 3/6] chore: finish legacy code building --- llvm/tools/stackanalyzer/CMakeLists.txt | 1 + llvm/tools/stackanalyzer/StackUsage.cpp | 59 ++++++++++++++++++- llvm/tools/stackanalyzer/StackUsage.h | 12 ++++ .../llvm/tools/stackanalyzer/BUILD.gn | 1 + .../llvm-project-overlay/llvm/BUILD.bazel | 1 + 5 files changed, 73 insertions(+), 1 deletion(-) diff --git a/llvm/tools/stackanalyzer/CMakeLists.txt b/llvm/tools/stackanalyzer/CMakeLists.txt index f1d67e8e09e9..2f013759b3f2 100644 --- a/llvm/tools/stackanalyzer/CMakeLists.txt +++ b/llvm/tools/stackanalyzer/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS Core Passes Support + ${LLVM_TARGETS_TO_BUILD} ) add_llvm_tool(stackanalyzer diff --git a/llvm/tools/stackanalyzer/StackUsage.cpp b/llvm/tools/stackanalyzer/StackUsage.cpp index e49c2bb312c8..aabec5e9c696 100644 --- a/llvm/tools/stackanalyzer/StackUsage.cpp +++ b/llvm/tools/stackanalyzer/StackUsage.cpp @@ -8,17 +8,26 @@ //===----------------------------------------------------------------------===// #include "StackUsage.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include using namespace llvm; +using namespace llvm::sys; void llvm::parseStackSizeFromSU( StringRef Filename, llvm::Module &Module, MapVector &StackSizeMap) { - + emitSUFile(Filename, Module); // Open the file and read its contents auto BufferOrError = MemoryBuffer::getFile(Filename); if (std::error_code EC = BufferOrError.getError()) { @@ -74,6 +83,54 @@ void llvm::parseStackSizeFromSU( } } +void llvm::emitSUFile(llvm::StringRef SUFilename, llvm::Module &Module) { + auto TargetTriple = getDefaultTargetTriple(); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + + std::string Error; + auto *Target = TargetRegistry::lookupTarget(TargetTriple, Error); + if (!Target) { + errs() << "Error: " << Error << "\n"; + return; + } + + auto *CPU = "generic"; + auto *Features = ""; + + TargetOptions Opt; + Opt.StackUsageOutput = SUFilename; + auto RM = std::optional(); + auto *TargetMachine = + Target->createTargetMachine(TargetTriple, CPU, Features, Opt, RM); + + Module.setDataLayout(TargetMachine->createDataLayout()); + Module.setTargetTriple(TargetTriple); + + auto *Filename = "/tmp/stackanalyzer/output.o"; + std::error_code EC; + raw_fd_ostream Dest(Filename, EC, sys::fs::OF_None); + + if (EC) { + errs() << "Error opening file: " << EC.message() << "\n"; + return; + } + + legacy::PassManager Pass; + auto FileType = CGFT_ObjectFile; + + if (TargetMachine->addPassesToEmitFile(Pass, Dest, nullptr, FileType)) { + errs() << "TargetMachine can't emit a file of this type"; + return; + } + + Pass.run(Module); + Dest.flush(); +} + void StackOverflowDetector::analyze( const CallGraph &CG, const MapVector &StackSizes) { diff --git a/llvm/tools/stackanalyzer/StackUsage.h b/llvm/tools/stackanalyzer/StackUsage.h index 067566766bec..561bd31e01f3 100644 --- a/llvm/tools/stackanalyzer/StackUsage.h +++ b/llvm/tools/stackanalyzer/StackUsage.h @@ -34,6 +34,18 @@ void parseStackSizeFromSU( llvm::StringRef Filename, llvm::Module &Module, llvm::MapVector &StackSizeMap); +/** + * @brief Emits a stack usage file for the given module. + * + * This function generates a stack usage file for the specified module. The + * stack usage file contains information about the stack usage of the functions + * in the module. + * + * @param Filename The path to the output file. + * @param Module The LLVM module for which the stack usage file is generated. + */ +void emitSUFile(llvm::StringRef Filename, llvm::Module &Module); + /** * @class StackOverflowDetector * @brief A class that detects stack overflow in a program. diff --git a/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn index 666fc0a009ec..ceaec3167f21 100644 --- a/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn @@ -5,6 +5,7 @@ executable("stackanalyzer") { "//llvm/lib/Passes", "//llvm/lib/Bitcode/Reader", "//llvm/lib/Support", + "//llvm/lib/Target:TargetsToBuild", ] sources = [ "CallGraphGen.cpp", diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 85d993d84a30..9ac4af7a2762 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4719,6 +4719,7 @@ cc_binary( copts = llvm_copts, stamp = 0, deps = [ + ":AllTargetsCodeGens", ":Analysis", ":BitcodeReader", ":Core", -- Gitee From 6a4d3ef25d8f7fcb182e59c3bb56fabbd910cd57 Mon Sep 17 00:00:00 2001 From: yuqiuchu Date: Wed, 14 Aug 2024 18:20:54 +0800 Subject: [PATCH 4/6] fix: replace .o/.su output using relative path. --- llvm/tools/stackanalyzer/StackUsage.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/tools/stackanalyzer/StackUsage.cpp b/llvm/tools/stackanalyzer/StackUsage.cpp index aabec5e9c696..034b5899ff31 100644 --- a/llvm/tools/stackanalyzer/StackUsage.cpp +++ b/llvm/tools/stackanalyzer/StackUsage.cpp @@ -12,7 +12,6 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/VirtualFileSystem.h" @@ -25,8 +24,8 @@ using namespace llvm; using namespace llvm::sys; void llvm::parseStackSizeFromSU( - StringRef Filename, llvm::Module &Module, - MapVector &StackSizeMap) { + StringRef Filename, Module &Module, + MapVector &StackSizeMap) { emitSUFile(Filename, Module); // Open the file and read its contents auto BufferOrError = MemoryBuffer::getFile(Filename); @@ -110,7 +109,7 @@ void llvm::emitSUFile(llvm::StringRef SUFilename, llvm::Module &Module) { Module.setDataLayout(TargetMachine->createDataLayout()); Module.setTargetTriple(TargetTriple); - auto *Filename = "/tmp/stackanalyzer/output.o"; + auto *Filename = "./output.o"; std::error_code EC; raw_fd_ostream Dest(Filename, EC, sys::fs::OF_None); -- Gitee From bc4b3c24f8a64d930eb323a0a4799cb7cd06e311 Mon Sep 17 00:00:00 2001 From: yuqiuchu Date: Wed, 14 Aug 2024 18:26:18 +0800 Subject: [PATCH 5/6] fix: use PACallgraphAnalysis. --- llvm/tools/stackanalyzer/stackanalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/stackanalyzer/stackanalyzer.cpp b/llvm/tools/stackanalyzer/stackanalyzer.cpp index 2fa502c7be1e..6e2790eeb448 100644 --- a/llvm/tools/stackanalyzer/stackanalyzer.cpp +++ b/llvm/tools/stackanalyzer/stackanalyzer.cpp @@ -89,7 +89,7 @@ int main(int argc, char **argv) { } parseStackSizeFromSU(InputSUFilename, *M, StackSize); - const auto &Graph = MAM.getResult(*M); + const auto &Graph = MAM.getResult(*M); if (UseCallGraph) { std::error_code EC; -- Gitee From a1639bd89f1385a9a46a00e28076eada7a1c4914 Mon Sep 17 00:00:00 2001 From: yuqiuchu Date: Thu, 15 Aug 2024 10:15:55 +0800 Subject: [PATCH 6/6] feat: support recursion. --- llvm/tools/stackanalyzer/CallGraphGen.cpp | 31 ++++++++++++-- llvm/tools/stackanalyzer/CallGraphGen.h | 3 ++ llvm/tools/stackanalyzer/StackUsage.cpp | 52 +++++++++++++++++------ llvm/tools/stackanalyzer/StackUsage.h | 5 ++- 4 files changed, 74 insertions(+), 17 deletions(-) diff --git a/llvm/tools/stackanalyzer/CallGraphGen.cpp b/llvm/tools/stackanalyzer/CallGraphGen.cpp index 4a2a72b83f18..3465be936b0a 100644 --- a/llvm/tools/stackanalyzer/CallGraphGen.cpp +++ b/llvm/tools/stackanalyzer/CallGraphGen.cpp @@ -77,6 +77,7 @@ PACallGraphAnalysis::Result PACallGraphAnalysis::run(Module &M, compForwardDataflow(&(*FI), &PAVisitor, &ResultFact, InitFact); PAVisitor.solveConstraint(); } + PAVisitor.canonicalizeCallGraph(); if (Config.UseDebug) { PAVisitor.printConstraintGraph(outs()); PAVisitor.printPointToSetMap(outs()); @@ -181,7 +182,6 @@ void PointerAnalysisVisitor::transfer(StoreInst *Inst, void PointerAnalysisVisitor::transfer(CallInst *Inst, PAAnalysisDataflowFacts *Dfval) { auto *Callee = Inst->getCalledFunction(); - auto *PrevFunction = CurrentFunction; if (!Callee) { auto *CalleeValue = Inst->getCalledOperand(); auto *Constraint = @@ -196,6 +196,7 @@ void PointerAnalysisVisitor::transfer(CallInst *Inst, Constraints.push_back(Constraint); ConstraintFunctionMap[Constraint] = CurrentFunction; } else { + auto *PrevFunction = CurrentFunction; if (Callee->isIntrinsic() || Callee->isDeclaration()) return; for (unsigned I = 0, NumOperands = Inst->arg_size(); I != NumOperands; @@ -327,11 +328,35 @@ void PointerAnalysisVisitor::solveUnsolvedConstraint(const Constraint *Cstrt) { } } +/** + * @brief Canonicalizes the call graph by removing redundant self-loops. + * + * This function iterates over all nodes in the call graph and checks if there + * are multiple self-loop edges (calls where a function calls itself). If more + * than one self-loop edge is found, the redundant edges are removed, leaving + * only one self-loop per node. + */ +void PointerAnalysisVisitor::canonicalizeCallGraph() { + for (auto &Node : CG) { + auto *CallGraphNode = Node.second.get(); + unsigned Cycles = 0; + for (auto CI = CallGraphNode->begin(), CE = CallGraphNode->end(); CI != CE; + CI++) { + auto *CallRecord = CI->second; + auto *Callee = CallRecord->getFunction(); + if (Callee == CallGraphNode->getFunction()) { + Cycles++; + if (Cycles > 1) + CallGraphNode->removeCallEdge(CI); + } + } + } +} + std::array(ConstraintKind::Init)> ConstraintKindToString = {"Copy", "GetAddr", "Load", "Store", "Unresolved"}; -/** - * @brief Prints the constraint graph. +/** * @brief Prints the constraint graph. * * This function prints the constraint graph in the DOT format. The graph * represents the constraints between different values in the analysis. The diff --git a/llvm/tools/stackanalyzer/CallGraphGen.h b/llvm/tools/stackanalyzer/CallGraphGen.h index 1583a3f6f023..8362985c8233 100644 --- a/llvm/tools/stackanalyzer/CallGraphGen.h +++ b/llvm/tools/stackanalyzer/CallGraphGen.h @@ -535,6 +535,9 @@ public: PointerAnalysisVisitor(Module &Module); void solveConstraint(); +private: + void canonicalizeCallGraph(); + public: void merge(PAAnalysisDataflowFacts *Facts, const PAAnalysisDataflowFacts &OtherFacts) override; diff --git a/llvm/tools/stackanalyzer/StackUsage.cpp b/llvm/tools/stackanalyzer/StackUsage.cpp index 034b5899ff31..3562fe70b3d4 100644 --- a/llvm/tools/stackanalyzer/StackUsage.cpp +++ b/llvm/tools/stackanalyzer/StackUsage.cpp @@ -141,7 +141,7 @@ void StackOverflowDetector::analyze( continue; if (!Visited.count(F)) { if (F->getName() == "main") - dfs(F, CG, StackSizes); + traverse(F, CG, StackSizes); } } } @@ -156,12 +156,7 @@ void StackOverflowDetector::printResults(raw_ostream &OS) const { } } -bool StackOverflowDetector::dfs( - Function *F, const CallGraph &CG, - const MapVector &StackSizes) { - Visited.insert(F); - unsigned CurrentStackSize = StackSizes.lookup(F); - PathStack.insert({F, CurrentStackSize}); +bool StackOverflowDetector::evaluateCurrentPath() { unsigned CumulativeStackSize = 0; for (auto &Entry : PathStack) { CumulativeStackSize += Entry.second; @@ -174,17 +169,50 @@ bool StackOverflowDetector::dfs( OverflowPaths.push_back(Path); return true; } + return false; +} +bool StackOverflowDetector::traverse( + Function *F, const CallGraph &CG, + const MapVector &StackSizes) { + // Check for loop detection: if we revisit a node that is in the PathStack, + // it's a loop + if (PathStack.count(F)) { + unsigned LoopStackSize = 0; + for (auto PI = PathStack.find(F), PE = PathStack.end(); PI != PE; ++PI) { + LoopStackSize += PI->second; + } + + // If the loop's stack cost is zero, treat it as a single node and evaluate + // current path + if (LoopStackSize == 0) { + return evaluateCurrentPath(); + } + // Otherwise, consider it a potential overflow path + std::vector Path; + for (auto &Entry : PathStack) { + Path.push_back(Entry.first); + } + OverflowPaths.push_back(Path); + return true; + } + + Visited.insert(F); + unsigned CurrentStackSize = StackSizes.lookup(F); + PathStack.insert({F, CurrentStackSize}); + if (evaluateCurrentPath()) { + return true; + } auto *CGNode = CG[F]; + + bool FindOverflowPath = false; for (auto &Callee : *CGNode) { Function *CalleeF = Callee.second->getFunction(); - if (CalleeF && !Visited.count(CalleeF) && !CalleeF->isDeclaration()) { - if (dfs(CalleeF, CG, StackSizes)) { - return true; - } + if (CalleeF && !CalleeF->isDeclaration()) { + FindOverflowPath = traverse(CalleeF, CG, StackSizes) || FindOverflowPath; } } PathStack.pop_back(); - return false; + return FindOverflowPath; } \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/StackUsage.h b/llvm/tools/stackanalyzer/StackUsage.h index 561bd31e01f3..e1b3f31f62eb 100644 --- a/llvm/tools/stackanalyzer/StackUsage.h +++ b/llvm/tools/stackanalyzer/StackUsage.h @@ -66,8 +66,9 @@ class StackOverflowDetector { std::set Visited; unsigned Threshold; - bool dfs(Function *F, const CallGraph &CG, - const MapVector &StackSizes); + bool traverse(Function *F, const CallGraph &CG, + const MapVector &StackSizes); + bool evaluateCurrentPath(); public: StackOverflowDetector(unsigned Limit) : Threshold(Limit) {} -- Gitee