diff --git a/0001-Fix-trap-value-for-non-X86.patch b/0001-Fix-trap-value-for-non-X86.patch new file mode 100644 index 0000000000000000000000000000000000000000..83542e4a2372b0dafd3cff6cb91677e6335a9e1d --- /dev/null +++ b/0001-Fix-trap-value-for-non-X86.patch @@ -0,0 +1,126 @@ +From 868d8c360b3e1e5f291cb3e0dae0777a4529228f Mon Sep 17 00:00:00 2001 +From: Denis Revunov +Date: Thu, 27 Jul 2023 11:48:08 -0400 +Subject: [PATCH] Fix trap value for non-X86 + +The trap value used by BOLT was assumed to be single-byte instruction. +It made some functions unaligned on AArch64(e.g exceptions-instrumentation test) +and caused emission failures. Fix that by changing fill value to StringRef. + +Reviewed By: rafauler + +Differential Revision: https://reviews.llvm.org/D158191 +--- + bolt/include/bolt/Core/MCPlusBuilder.h | 9 ++++++--- + bolt/lib/Core/BinaryEmitter.cpp | 4 ++-- + bolt/lib/Rewrite/RewriteInstance.cpp | 6 ++++-- + bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 4 ++++ + bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 4 ++++ + bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 2 +- + 6 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h +index 56d0228cd..beb06751d 100644 +--- a/bolt/include/bolt/Core/MCPlusBuilder.h ++++ b/bolt/include/bolt/Core/MCPlusBuilder.h +@@ -636,9 +636,12 @@ public: + return false; + } + +- /// If non-zero, this is used to fill the executable space with instructions +- /// that will trap. Defaults to 0. +- virtual unsigned getTrapFillValue() const { return 0; } ++ /// Used to fill the executable space with instructions ++ /// that will trap. ++ virtual StringRef getTrapFillValue() const { ++ llvm_unreachable("not implemented"); ++ return StringRef(); ++ } + + /// Interface and basic functionality of a MCInstMatcher. The idea is to make + /// it easy to match one or more MCInsts against a tree-like pattern and +diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp +index c4129615a..df076c81d 100644 +--- a/bolt/lib/Core/BinaryEmitter.cpp ++++ b/bolt/lib/Core/BinaryEmitter.cpp +@@ -376,7 +376,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, + } + + if (opts::MarkFuncs) +- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1); ++ Streamer.emitBytes(BC.MIB->getTrapFillValue()); + + // Emit CFI end + if (Function.hasCFI()) +@@ -420,7 +420,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, + // case, the call site entries in that LSDA have 0 as offset to the landing + // pad, which the runtime interprets as "no handler". To prevent this, + // insert some padding. +- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1); ++ Streamer.emitBytes(BC.MIB->getTrapFillValue()); + } + + // Track the first emitted instruction with debug info. +diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp +index fe8c134b8..c6ea0b009 100644 +--- a/bolt/lib/Rewrite/RewriteInstance.cpp ++++ b/bolt/lib/Rewrite/RewriteInstance.cpp +@@ -5273,8 +5273,10 @@ void RewriteInstance::rewriteFile() { + if (!BF.getFileOffset() || !BF.isEmitted()) + continue; + OS.seek(BF.getFileOffset()); +- for (unsigned I = 0; I < BF.getMaxSize(); ++I) +- OS.write((unsigned char)BC->MIB->getTrapFillValue()); ++ StringRef TrapInstr = BC->MIB->getTrapFillValue(); ++ unsigned NInstr = BF.getMaxSize() / TrapInstr.size(); ++ for (unsigned I = 0; I < NInstr; ++I) ++ OS.write(TrapInstr.data(), TrapInstr.size()); + } + OS.seek(SavedPos); + } +diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +index acf21ba23..cd66b654e 100644 +--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp ++++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +@@ -1142,6 +1142,10 @@ public: + } + } + ++ StringRef getTrapFillValue() const override { ++ return StringRef("\0\0\0\0", 4); ++ } ++ + bool createReturn(MCInst &Inst) const override { + Inst.setOpcode(AArch64::RET); + Inst.clear(); +diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +index ec5bca852..badc1bde8 100644 +--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp ++++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +@@ -171,6 +171,10 @@ public: + return true; + } + ++ StringRef getTrapFillValue() const override { ++ return StringRef("\0\0\0\0", 4); ++ } ++ + bool analyzeBranch(InstructionIterator Begin, InstructionIterator End, + const MCSymbol *&TBB, const MCSymbol *&FBB, + MCInst *&CondBranch, +diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +index 3ee161d0b..5e3c01a1c 100644 +--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp ++++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +@@ -397,7 +397,7 @@ public: + } + } + +- unsigned getTrapFillValue() const override { return 0xCC; } ++ StringRef getTrapFillValue() const override { return StringRef("\314", 1); } + + struct IndJmpMatcherFrag1 : MCInstMatcher { + std::unique_ptr Base; +-- +2.33.0 + diff --git a/0002-Add-test-for-emitting-trap-value.patch b/0002-Add-test-for-emitting-trap-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..8cc1c6d8308dc848072e0b6be000f9fc12c96068 --- /dev/null +++ b/0002-Add-test-for-emitting-trap-value.patch @@ -0,0 +1,44 @@ +From e4ae238a42296a84bc819dd1fb61f3c699952f17 Mon Sep 17 00:00:00 2001 +From: Denis Revunov +Date: Thu, 17 Aug 2023 18:30:07 +0300 +Subject: [PATCH] Add test for emitting trap value + +Reviewed By: rafauler + +Differential Revision: https://reviews.llvm.org/D158191 +--- + bolt/test/runtime/mark-funcs.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + create mode 100644 bolt/test/runtime/mark-funcs.c + +diff --git a/bolt/test/runtime/mark-funcs.c b/bolt/test/runtime/mark-funcs.c +new file mode 100644 +index 000000000..a8586ca8b +--- /dev/null ++++ b/bolt/test/runtime/mark-funcs.c +@@ -0,0 +1,22 @@ ++#include ++ ++int dummy() { ++ printf("Dummy called\n"); ++ return 0; ++} ++ ++int main(int argc, char **argv) { ++ if (dummy() != 0) ++ return 1; ++ printf("Main called\n"); ++ return 0; ++} ++// Check that emitting trap value works properly and ++// does not break functions ++// REQUIRES: system-linux ++// RUN: %clangxx -Wl,-q %s -o %t.exe ++// RUN: %t.exe | FileCheck %s ++// CHECK: Dummy called ++// CHECK-NEXT: Main called ++// RUN: llvm-bolt %t.exe -o %t.exe.bolt -lite=false --mark-funcs ++// RUN: %t.exe.bolt | FileCheck %s +-- +2.33.0 + diff --git a/0003-AArch64-Add-AArch64-support-for-inline.patch b/0003-AArch64-Add-AArch64-support-for-inline.patch new file mode 100644 index 0000000000000000000000000000000000000000..cb64595fbbeddb127f769c8facf7676b1178cdf9 --- /dev/null +++ b/0003-AArch64-Add-AArch64-support-for-inline.patch @@ -0,0 +1,274 @@ +From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001 +From: xiongzhou4 +Date: Wed, 12 Jun 2024 17:12:36 +0800 +Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline. + +--- + bolt/include/bolt/Core/MCPlusBuilder.h | 5 +-- + bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ + .../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++ + bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ + bolt/test/AArch64/Inputs/inline-main.c | 5 +++ + bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ + bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ + bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ + bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ + bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ + bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ + 13 files changed, 136 insertions(+), 4 deletions(-) + create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c + create mode 100644 bolt/test/AArch64/Inputs/inline-main.c + create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp + create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp + create mode 100644 bolt/test/AArch64/inline-debug-info.test + create mode 100644 bolt/test/AArch64/inlined-function-mixed.test + create mode 100644 bolt/test/AArch64/jmp-optimization.test + +diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h +index db3f7e7f1..56d0228cd 100644 +--- a/bolt/include/bolt/Core/MCPlusBuilder.h ++++ b/bolt/include/bolt/Core/MCPlusBuilder.h +@@ -573,10 +573,7 @@ public: + return 0; + } + +- virtual bool isPush(const MCInst &Inst) const { +- llvm_unreachable("not implemented"); +- return false; +- } ++ virtual bool isPush(const MCInst &Inst) const { return false; } + + /// Return the width, in bytes, of the memory access performed by \p Inst, if + /// this is a push instruction. Return zero otherwise. +diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp +index 8dcb8934f..67dd294fb 100644 +--- a/bolt/lib/Passes/Inliner.cpp ++++ b/bolt/lib/Passes/Inliner.cpp +@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { + << ". Size change: " << SizeAfterInlining + << " bytes.\n"); + ++// Skip situations where some A64 instructions can't be inlined: ++// # Indirect branch, e.g., BR. ++// # Branch instructions but used to make a function call. ++ if (BC.isAArch64()) { ++ auto &MIB = *BC.MIB; ++ bool skip = false; ++ for (const BinaryBasicBlock &BB : *TargetFunction) { ++ for (MCInst Inst : BB) { ++ if (MIB.isPseudo(Inst)) ++ continue; ++ ++ MIB.stripAnnotations(Inst, false); ++ ++ if (MIB.isBranch(Inst)) { ++ const BinaryBasicBlock *TargetBB = ++ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); ++ if (MIB.isIndirectBranch(Inst) || !TargetBB) { ++ skip = true; ++ break; ++ } ++ } ++ } ++ if (skip) ++ break; ++ } ++ if (skip) { ++ ++InstIt; ++ continue; ++ } ++ } ++ + std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); + + DidInlining = true; +diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +index d109a5d52..acf21ba23 100644 +--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp ++++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +@@ -34,6 +34,8 @@ public: + const MCRegisterInfo *RegInfo) + : MCPlusBuilder(Analysis, Info, RegInfo) {} + ++ MCPhysReg getStackPointer() const override { return AArch64::SP; } ++ + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, + CompFuncTy Comp) const override { + const auto &AArch64ExprA = cast(A); +@@ -816,6 +818,14 @@ public: + + int getUncondBranchEncodingSize() const override { return 28; } + ++ bool createCall(MCInst &Inst, const MCSymbol *Target, ++ MCContext *Ctx) override { ++ Inst.setOpcode(AArch64::BL); ++ Inst.addOperand(MCOperand::createExpr( ++ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); ++ return true; ++ } ++ + bool createTailCall(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + Inst.setOpcode(AArch64::B); +diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c +new file mode 100644 +index 000000000..1307c13f2 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-foo.c +@@ -0,0 +1,5 @@ ++#include "stub.h" ++ ++void foo() { ++ puts("Hello world!\n"); ++} +diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c +new file mode 100644 +index 000000000..7853d2b63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-main.c +@@ -0,0 +1,5 @@ ++extern void foo(); ++int main() { ++ foo(); ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp +new file mode 100644 +index 000000000..a6ff9e262 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlined.cpp +@@ -0,0 +1,23 @@ ++extern "C" int printf(const char*, ...); ++extern const char* question(); ++ ++inline int answer() __attribute__((always_inline)); ++inline int answer() { return 42; } ++ ++int main(int argc, char *argv[]) { ++ int ans; ++ if (argc == 1) { ++ ans = 0; ++ } else { ++ ans = argc; ++ } ++ printf("%s\n", question()); ++ for (int i = 0; i < 10; ++i) { ++ int x = answer(); ++ int y = answer(); ++ ans += x - y; ++ } ++ // padding to make sure question() is inlineable ++ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); ++ return ans; ++} +diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp +new file mode 100644 +index 000000000..edb7ab145 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlinee.cpp +@@ -0,0 +1,3 @@ ++const char* question() { ++ return "What do you get if you multiply six by nine?"; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp +new file mode 100644 +index 000000000..cd6d53c35 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp +@@ -0,0 +1,7 @@ ++int g(); ++ ++int main() { ++ int x = g(); ++ int y = x*x; ++ return y; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +new file mode 100644 +index 000000000..80b853d63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +@@ -0,0 +1,3 @@ ++int f() { ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +new file mode 100644 +index 000000000..7fb551163 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +@@ -0,0 +1,3 @@ ++int f(); ++ ++int g() { return f(); } +diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test +new file mode 100644 +index 000000000..e20e5e31e +--- /dev/null ++++ b/bolt/test/AArch64/inline-debug-info.test +@@ -0,0 +1,20 @@ ++## Check that BOLT correctly prints and updates debug info for inlined ++## functions. ++ ++# REQUIRES: system-linux ++ ++# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ ++# RUN: -I%p/../Inputs -o %t.exe -Wl,-q ++# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ ++# RUN: --print-only=main --print-after-lowering --force-inline=foo \ ++# RUN: -o %t.bolt \ ++# RUN: | FileCheck %s ++ ++## The call to puts() should come from inline-foo.c: ++# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 ++ ++# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ ++# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP ++ ++## Dump of main() should include debug info from inline-foo.c after inlining: ++# CHECK-OBJDUMP: inline-foo.c:4 +diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test +new file mode 100644 +index 000000000..5a87bdde9 +--- /dev/null ++++ b/bolt/test/AArch64/inlined-function-mixed.test +@@ -0,0 +1,11 @@ ++# Make sure inlining from a unit with debug info into unit without ++# debug info does not cause a crash. ++ ++RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o ++RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g ++RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t ++ ++RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ ++RUN: --inline-small-functions --force-inline=main | FileCheck %s ++ ++CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten +diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test +new file mode 100644 +index 000000000..92f4b9a14 +--- /dev/null ++++ b/bolt/test/AArch64/jmp-optimization.test +@@ -0,0 +1,14 @@ ++# Tests the optimization of functions that just do a tail call in the beginning. ++ ++# This test has commands that rely on shell capabilities that won't execute ++# correctly on Windows e.g. unsupported parameter expansion ++REQUIRES: shell ++ ++RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t ++RUN: llvm-bolt -inline-small-functions %t -o %t.bolt ++RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s ++ ++CHECK:
: ++CHECK-NOT: call ++CHECK: xorl %eax, %eax ++CHECK: retq +-- +2.33.0 + diff --git a/0004-Bolt-Solving-pie-support-issue.patch b/0004-Bolt-Solving-pie-support-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..b26d9fcdf1eb2fbfc4f02094a06ffcf3ea1fae2c --- /dev/null +++ b/0004-Bolt-Solving-pie-support-issue.patch @@ -0,0 +1,170 @@ +From a28084a4adff2340dd02c2c0c42f4997f76b3ffa Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Fri, 21 Jun 2024 11:16:44 +0800 +Subject: [PATCH] [Bolt] Solving pie support issue + +--- + bolt/lib/Core/BinaryContext.cpp | 25 +++++++++++++++++++---- + bolt/test/perf2bolt/Inputs/perf_test.c | 26 ++++++++++++++++++++++++ + bolt/test/perf2bolt/Inputs/perf_test.lds | 13 ++++++++++++ + bolt/test/perf2bolt/lit.local.cfg | 4 ++++ + bolt/test/perf2bolt/perf_test.test | 17 ++++++++++++++++ + bolt/unittests/Core/BinaryContext.cpp | 21 +++++++++++++++++++ + 6 files changed, 102 insertions(+), 4 deletions(-) + create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.c + create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.lds + create mode 100644 bolt/test/perf2bolt/lit.local.cfg + create mode 100644 bolt/test/perf2bolt/perf_test.test + +diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp +index 2d2b35ee2..ab9f0b844 100644 +--- a/bolt/lib/Core/BinaryContext.cpp ++++ b/bolt/lib/Core/BinaryContext.cpp +@@ -1880,10 +1880,27 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, + // Find a segment with a matching file offset. + for (auto &KV : SegmentMapInfo) { + const SegmentInfo &SegInfo = KV.second; +- if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { +- // Use segment's aligned memory offset to calculate the base address. +- const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); +- return MMapAddress - MemOffset; ++ // FileOffset is got from perf event, ++ // and it is equal to alignDown(SegInfo.FileOffset, pagesize). ++ // If the pagesize is not equal to SegInfo.Alignment. ++ // FileOffset and SegInfo.FileOffset should be aligned first, ++ // and then judge whether they are equal. ++ if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == ++ alignDown(FileOffset, SegInfo.Alignment)) { ++ // The function's offset from base address in VAS is aligned by pagesize ++ // instead of SegInfo.Alignment. Pagesize can't be got from perf events. ++ // However, The ELF document says that SegInfo.FileOffset should equal ++ // to SegInfo.Address, modulo the pagesize. ++ // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf ++ ++ // So alignDown(SegInfo.Address, pagesize) can be calculated by: ++ // alignDown(SegInfo.Address, pagesize) ++ // = SegInfo.Address - (SegInfo.Address % pagesize) ++ // = SegInfo.Address - (SegInfo.FileOffset % pagesize) ++ // = SegInfo.Address - SegInfo.FileOffset + ++ // alignDown(SegInfo.FileOffset, pagesize) ++ // = SegInfo.Address - SegInfo.FileOffset + FileOffset ++ return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); + } + } + +diff --git a/bolt/test/perf2bolt/Inputs/perf_test.c b/bolt/test/perf2bolt/Inputs/perf_test.c +new file mode 100644 +index 000000000..ff5ecf7a8 +--- /dev/null ++++ b/bolt/test/perf2bolt/Inputs/perf_test.c +@@ -0,0 +1,26 @@ ++#include ++#include ++#include ++ ++int add(int a, int b) { return a + b; } ++int minus(int a, int b) { return a - b; } ++int multiple(int a, int b) { return a * b; } ++int divide(int a, int b) { ++ if (b == 0) ++ return 0; ++ return a / b; ++} ++ ++int main() { ++ int a = 16; ++ int b = 8; ++ ++ for (int i = 1; i < 100000; i++) { ++ add(a, b); ++ minus(a, b); ++ multiple(a, b); ++ divide(a, b); ++ } ++ ++ return 0; ++} +diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds +new file mode 100644 +index 000000000..9cb4ebbf1 +--- /dev/null ++++ b/bolt/test/perf2bolt/Inputs/perf_test.lds +@@ -0,0 +1,13 @@ ++SECTIONS { ++ . = SIZEOF_HEADERS; ++ .interp : { *(.interp) } ++ .note.gnu.build-id : { *(.note.gnu.build-id) } ++ . = 0x212e8; ++ .dynsym : { *(.dynsym) } ++ . = 0x31860; ++ .text : { *(.text*) } ++ . = 0x41c20; ++ .fini_array : { *(.fini_array) } ++ . = 0x54e18; ++ .data : { *(.data) } ++} +diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg +new file mode 100644 +index 000000000..87a96ec34 +--- /dev/null ++++ b/bolt/test/perf2bolt/lit.local.cfg +@@ -0,0 +1,4 @@ ++import shutil ++ ++if shutil.which("perf") != None: ++ config.available_features.add("perf") +diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test +new file mode 100644 +index 000000000..fe6e015ab +--- /dev/null ++++ b/bolt/test/perf2bolt/perf_test.test +@@ -0,0 +1,17 @@ ++# Check perf2bolt binary function which was compiled with pie ++ ++REQUIRES: system-linux, perf ++ ++RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t ++RUN: perf record -e cycles:u -o %t2 -- %t ++RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s ++ ++CHECK-NOT: PERF2BOLT-ERROR ++CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. ++ ++RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 ++RUN: perf record -e cycles:u -o %t5 -- %t4 ++RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE ++ ++CHECK-NO-PIE-NOT: PERF2BOLT-ERROR ++CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. +diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp +index bac264141..5a80cb4a2 100644 +--- a/bolt/unittests/Core/BinaryContext.cpp ++++ b/bolt/unittests/Core/BinaryContext.cpp +@@ -83,3 +83,24 @@ TEST_P(BinaryContextTester, BaseAddress) { + BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000); + ASSERT_FALSE(BaseAddress.has_value()); + } ++ ++TEST_P(BinaryContextTester, BaseAddress2) { ++ // Check that base address calculation is correct for a binary if the ++ // alignment in ELF file are different from pagesize. ++ // The segment layout is as follows: ++ BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000}; ++ BC->SegmentMapInfo[0x31860] = ++ SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000}; ++ BC->SegmentMapInfo[0x41c20] = ++ SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000}; ++ BC->SegmentMapInfo[0x54e18] = ++ SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000}; ++ ++ std::optional BaseAddress = ++ BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000); ++ ASSERT_TRUE(BaseAddress.has_value()); ++ ASSERT_EQ(*BaseAddress, 0xaaaaea413000ULL); ++ ++ BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000); ++ ASSERT_FALSE(BaseAddress.has_value()); ++} +-- +2.39.2 (Apple Git-143) + diff --git a/0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch b/0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch new file mode 100644 index 0000000000000000000000000000000000000000..eda8d214b70aac572d487ff8a70e5233955d4ba5 --- /dev/null +++ b/0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch @@ -0,0 +1,130 @@ +From 28e7e71251dc4b79c29aa0d4904cb424f9081455 Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Fri, 21 Jun 2024 11:23:42 +0800 +Subject: [PATCH] [BOLT][AArch64] Don't change layout in PatchEntries + +--- + bolt/lib/Passes/PatchEntries.cpp | 11 ++++++++ + bolt/test/AArch64/patch-entries.s | 36 ++++++++++++++++++++++++ + bolt/unittests/Core/BinaryContext.cpp | 40 +++++++++++++++++++++++++++ + 3 files changed, 87 insertions(+) + create mode 100644 bolt/test/AArch64/patch-entries.s + +diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp +index 02a044d8b..ee7512d89 100644 +--- a/bolt/lib/Passes/PatchEntries.cpp ++++ b/bolt/lib/Passes/PatchEntries.cpp +@@ -98,6 +98,17 @@ void PatchEntries::runOnFunctions(BinaryContext &BC) { + }); + + if (!Success) { ++ // We can't change output layout for AArch64 due to LongJmp pass ++ if (BC.isAArch64()) { ++ if (opts::ForcePatch) { ++ errs() << "BOLT-ERROR: unable to patch entries in " << Function ++ << "\n"; ++ exit(1); ++ } ++ ++ continue; ++ } ++ + // If the original function entries cannot be patched, then we cannot + // safely emit new function body. + errs() << "BOLT-WARNING: failed to patch entries in " << Function +diff --git a/bolt/test/AArch64/patch-entries.s b/bolt/test/AArch64/patch-entries.s +new file mode 100644 +index 000000000..cf6f72a0b +--- /dev/null ++++ b/bolt/test/AArch64/patch-entries.s +@@ -0,0 +1,36 @@ ++# This test checks patch entries functionality ++ ++# REQUIRES: system-linux ++ ++# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ ++# RUN: %s -o %t.o ++# RUN: %clang %cflags -pie %t.o -o %t.exe -nostdlib -Wl,-q ++# RUN: llvm-bolt %t.exe -o %t.bolt --use-old-text=0 --lite=0 --skip-funcs=_start ++# RUN: llvm-objdump -dz %t.bolt | FileCheck %s ++ ++# CHECK: : ++# CHECK-NEXT: adrp x16, 0x[[#%x,ADRP:]] ++# CHECK-NEXT: add x16, x16, #0x[[#%x,ADD:]] ++# CHECK-NEXT: br x16 ++ ++# CHECK: [[#ADRP + ADD]] : ++# CHECK-NEXT: [[#ADRP + ADD]]: {{.*}} ret ++ ++.text ++.balign 4 ++.global pathedEntries ++.type pathedEntries, %function ++pathedEntries: ++ .rept 32 ++ nop ++ .endr ++ ret ++.size pathedEntries, .-pathedEntries ++ ++.global _start ++.type _start, %function ++_start: ++ bl pathedEntries ++ .inst 0xdeadbeef ++ ret ++.size _start, .-_start +diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp +index 5a80cb4a2..7ac1c1435 100644 +--- a/bolt/unittests/Core/BinaryContext.cpp ++++ b/bolt/unittests/Core/BinaryContext.cpp +@@ -62,6 +62,46 @@ INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester, + INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester, + ::testing::Values(Triple::aarch64)); + ++TEST_P(BinaryContextTester, FlushPendingRelocCALL26) { ++ if (GetParam() != Triple::aarch64) ++ GTEST_SKIP(); ++ ++ // This test checks that encodeValueAArch64 used by flushPendingRelocations ++ // returns correctly encoded values for CALL26 relocation for both backward ++ // and forward branches. ++ // ++ // The offsets layout is: ++ // 4: func1 ++ // 8: bl func1 ++ // 12: bl func2 ++ // 16: func2 ++ ++ char Data[20] = {}; ++ BinarySection &BS = BC->registerOrUpdateSection( ++ ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, ++ (uint8_t *)Data, sizeof(Data), 4); ++ MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1"); ++ ASSERT_TRUE(RelSymbol1); ++ BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true); ++ MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2"); ++ ASSERT_TRUE(RelSymbol2); ++ BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true); ++ ++ std::error_code EC; ++ SmallVector Vect(sizeof(Data)); ++ raw_svector_ostream OS(Vect); ++ ++ BS.flushPendingRelocations(OS, [&](const MCSymbol *S) { ++ return S == RelSymbol1 ? 4 : S == RelSymbol2 ? 16 : 0; ++ }); ++ ++ const uint8_t Func1Call[4] = {255, 255, 255, 151}; ++ const uint8_t Func2Call[4] = {1, 0, 0, 148}; ++ ++ EXPECT_FALSE(memcmp(Func1Call, &Vect[8], 4)) << "Wrong backward call value\n"; ++ EXPECT_FALSE(memcmp(Func2Call, &Vect[12], 4)) << "Wrong forward call value\n"; ++} ++ + #endif + + TEST_P(BinaryContextTester, BaseAddress) { +-- +2.39.2 (Apple Git-143) + diff --git a/0006-AArch64-Add-CFG-block-count-correction-optimization.patch b/0006-AArch64-Add-CFG-block-count-correction-optimization.patch new file mode 100644 index 0000000000000000000000000000000000000000..b90b76d9461c27694516afc455562b5889485b0b --- /dev/null +++ b/0006-AArch64-Add-CFG-block-count-correction-optimization.patch @@ -0,0 +1,1820 @@ +From 25c9e9c7d4532f6e8962a25c5c7087bf3e3b8445 Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Thu, 25 Jul 2024 14:45:53 +0800 +Subject: [PATCH] Add CFG block count correction optimization. + +--- + bolt/include/bolt/Core/BinaryBasicBlock.h | 59 +- + .../bolt/Core/BinaryBasicBlockFeature.h | 268 ++++++++ + bolt/include/bolt/Passes/FeatureMiner.h | 176 ++++++ + bolt/include/bolt/Passes/StaticBranchInfo.h | 108 ++++ + bolt/include/bolt/Profile/DataReader.h | 93 ++- + bolt/lib/Core/BinaryBasicBlockFeature.cpp | 21 + + bolt/lib/Core/CMakeLists.txt | 1 + + bolt/lib/Passes/CMakeLists.txt | 2 + + bolt/lib/Passes/FeatureMiner.cpp | 572 ++++++++++++++++++ + bolt/lib/Passes/StaticBranchInfo.cpp | 143 +++++ + bolt/lib/Profile/DataReader.cpp | 120 +++- + bolt/lib/Rewrite/RewriteInstance.cpp | 6 + + 12 files changed, 1557 insertions(+), 12 deletions(-) + create mode 100644 bolt/include/bolt/Core/BinaryBasicBlockFeature.h + create mode 100644 bolt/include/bolt/Passes/FeatureMiner.h + create mode 100644 bolt/include/bolt/Passes/StaticBranchInfo.h + create mode 100644 bolt/lib/Core/BinaryBasicBlockFeature.cpp + create mode 100644 bolt/lib/Passes/FeatureMiner.cpp + create mode 100644 bolt/lib/Passes/StaticBranchInfo.cpp + +diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h +index 02be9c1d4..a39d38d6b 100644 +--- a/bolt/include/bolt/Core/BinaryBasicBlock.h ++++ b/bolt/include/bolt/Core/BinaryBasicBlock.h +@@ -15,6 +15,7 @@ + #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H + #define BOLT_CORE_BINARY_BASIC_BLOCK_H + ++#include "bolt/Core/BinaryBasicBlockFeature.h" + #include "bolt/Core/FunctionLayout.h" + #include "bolt/Core/MCPlus.h" + #include "llvm/ADT/GraphTraits.h" +@@ -25,6 +26,7 @@ + #include "llvm/Support/raw_ostream.h" + #include + #include ++#include + + namespace llvm { + class MCCodeEmitter; +@@ -147,6 +149,12 @@ private: + /// Last computed hash value. + mutable uint64_t Hash{0}; + ++ std::set ChildrenSet; ++ ++ std::set ParentSet; ++ ++ BinaryBasicBlockFeature BlockFeatures; ++ + private: + BinaryBasicBlock() = delete; + BinaryBasicBlock(const BinaryBasicBlock &) = delete; +@@ -385,11 +393,14 @@ public: + /// If the basic block ends with a conditional branch (possibly followed by + /// an unconditional branch) and thus has 2 successors, return a successor + /// corresponding to a jump condition which could be true or false. +- /// Return nullptr if the basic block does not have a conditional jump. ++ /// Return the only successor if it's followed by an unconditional branch. ++ /// Return nullptr otherwise. + BinaryBasicBlock *getConditionalSuccessor(bool Condition) { +- if (succ_size() != 2) +- return nullptr; +- return Successors[Condition == true ? 0 : 1]; ++ if (succ_size() == 2) ++ return Successors[Condition == true ? 0 : 1]; ++ if (succ_size() == 1) ++ return Successors[0]; ++ return nullptr; + } + + const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { +@@ -410,6 +421,13 @@ public: + return const_cast(this)->getFallthrough(); + } + ++ /// Return branch info corresponding to only branch. ++ const BinaryBranchInfo &getOnlyBranchInfo() const { ++ assert(BranchInfo.size() > 0 && ++ "could only be called for blocks with at least 1 successor"); ++ return BranchInfo[0]; ++ }; ++ + /// Return branch info corresponding to a taken branch. + const BinaryBranchInfo &getTakenBranchInfo() const { + assert(BranchInfo.size() == 2 && +@@ -818,6 +836,36 @@ public: + OutputAddressRange.second = Address; + } + ++ /// Sets features of this BB. ++ void setFeatures(BinaryBasicBlockFeature BBF) { ++ BlockFeatures = BBF; ++ } ++ ++ /// Gets numberic features of this BB. ++ BinaryBasicBlockFeature getFeatures() { ++ return BlockFeatures; ++ } ++ ++ /// Gets children sets of this BB. ++ std::set getChildrenSet() { ++ return ChildrenSet; ++ } ++ ++ /// Gets parent sets of this BB. ++ std::set getParentSet() { ++ return ParentSet; ++ } ++ ++ /// Inserts children sets of this BB. ++ void insertChildrenSet(BinaryBasicBlock *Node) { ++ ChildrenSet.insert(Node); ++ } ++ ++ /// Inserts parent sets of this BB. ++ void insertParentSet(BinaryBasicBlock *Node) { ++ ParentSet.insert(Node); ++ } ++ + /// Gets the memory address range of this BB in the input binary. + std::pair getInputAddressRange() const { + return InputRange; +@@ -991,7 +1039,8 @@ private: + #if defined(LLVM_ON_UNIX) + /// Keep the size of the BinaryBasicBlock within a reasonable size class + /// (jemalloc bucket) on Linux +-static_assert(sizeof(BinaryBasicBlock) <= 256); ++/// The size threshod is expanded from 256 to 2048 to contain the extra BB features ++static_assert(sizeof(BinaryBasicBlock) <= 2048, ""); + #endif + + bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); +diff --git a/bolt/include/bolt/Core/BinaryBasicBlockFeature.h b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h +new file mode 100644 +index 000000000..2b4809b1a +--- /dev/null ++++ b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h +@@ -0,0 +1,268 @@ ++//===- bolt/Core/BinaryBasicBlockFeature.h - Low-level basic block -----*- C++ ++//-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Features of BinaryBasicBlock ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H ++#define BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H ++ ++#include "bolt/Core/FunctionLayout.h" ++#include "bolt/Core/MCPlus.h" ++#include "llvm/ADT/GraphTraits.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/Support/ErrorOr.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++ ++namespace llvm { ++ ++namespace bolt { ++ ++class BinaryBasicBlockFeature { ++ ++public: ++ int32_t Opcode; ++ ++ int16_t Direction; ++ ++ int32_t CmpOpcode; ++ ++ int16_t LoopHeader; ++ ++ int16_t ProcedureType; ++ ++ int64_t Count; ++ ++ int64_t FallthroughCount; ++ ++ int64_t TotalLoops; ++ ++ int64_t LoopDepth; ++ ++ int64_t LoopNumBlocks; ++ ++ int64_t LocalExitingBlock; ++ ++ int64_t LocalLatchBlock; ++ ++ int64_t LocalLoopHeader; ++ ++ int64_t Call; ++ ++ int64_t DeltaTaken; ++ ++ int64_t NumLoads; ++ ++ int64_t NumCalls; ++ ++ int64_t OperandRAType; ++ ++ int64_t OperandRBType; ++ ++ int64_t BasicBlockSize; ++ ++ int64_t NumBasicBlocks; ++ ++ int64_t HasIndirectCalls; ++ ++ std::vector EndOpcode_vec; ++ ++ std::vector LoopHeader_vec; ++ ++ std::vector Backedge_vec; ++ ++ std::vector Exit_vec; ++ ++ std::vector Call_vec; ++ ++ std::vector BasicBlockSize_vec; ++ ++ std::vector InferenceFeatures; ++ ++ uint64_t FuncExec; ++ ++ int32_t ParentChildNum; ++ ++ int32_t ParentCount; ++ ++ int32_t ChildParentNum; ++ ++ int32_t ChildCount; ++ ++public: ++ void setOpcode(const int32_t &BlockOpcode) { Opcode = BlockOpcode; } ++ ++ void setDirection(const int16_t &BlockDirection) { ++ Direction = BlockDirection; ++ } ++ ++ void setCmpOpcode(const int32_t &BlockCmpOpcode) { ++ CmpOpcode = BlockCmpOpcode; ++ } ++ ++ void setLoopHeader(const int16_t &BlockLoopHeader) { ++ LoopHeader = BlockLoopHeader; ++ } ++ ++ void setProcedureType(const int16_t &BlockProcedureType) { ++ ProcedureType = BlockProcedureType; ++ } ++ ++ void setCount(const int64_t &BlockCount) { Count = BlockCount; } ++ ++ void setFallthroughCount(const int64_t &BlockFallthroughCount) { ++ FallthroughCount = BlockFallthroughCount; ++ } ++ ++ void setTotalLoops(const int64_t &BlockTotalLoops) { ++ TotalLoops = BlockTotalLoops; ++ } ++ ++ void setLoopDepth(const int64_t &BlockLoopDepth) { ++ LoopDepth = BlockLoopDepth; ++ } ++ ++ void setLoopNumBlocks(const int64_t &BlockLoopNumBlocks) { ++ LoopNumBlocks = BlockLoopNumBlocks; ++ } ++ ++ void setLocalExitingBlock(const int64_t &BlockLocalExitingBlock) { ++ LocalExitingBlock = BlockLocalExitingBlock; ++ } ++ ++ void setLocalLatchBlock(const int64_t &BlockLocalLatchBlock) { ++ LocalLatchBlock = BlockLocalLatchBlock; ++ } ++ ++ void setLocalLoopHeader(const int64_t &BlockLocalLoopHeader) { ++ LocalLoopHeader = BlockLocalLoopHeader; ++ } ++ ++ void setDeltaTaken(const int64_t &BlockDeltaTaken) { ++ DeltaTaken = BlockDeltaTaken; ++ } ++ ++ void setNumLoads(const int64_t &BlockNumLoads) { NumLoads = BlockNumLoads; } ++ ++ void setNumCalls(const int64_t &BlockNumCalls) { NumCalls = BlockNumCalls; } ++ ++ void setOperandRAType(const int64_t &BlockOperandRAType) { ++ OperandRAType = BlockOperandRAType; ++ } ++ ++ void setOperandRBType(const int64_t &BlockOperandRBType) { ++ OperandRBType = BlockOperandRBType; ++ } ++ ++ void setBasicBlockSize(const int64_t &BlockBasicBlockSize) { ++ BasicBlockSize = BlockBasicBlockSize; ++ } ++ ++ void setNumBasicBlocks(const int64_t &BlockNumBasicBlocks) { ++ NumBasicBlocks = BlockNumBasicBlocks; ++ } ++ ++ void setHasIndirectCalls(const int64_t &BlockHasIndirectCalls) { ++ HasIndirectCalls = BlockHasIndirectCalls; ++ } ++ ++ void setEndOpcodeVec(const int32_t &EndOpcode) { ++ EndOpcode_vec.push_back(EndOpcode); ++ } ++ ++ void setLoopHeaderVec(const int16_t &LoopHeader) { ++ LoopHeader_vec.push_back(LoopHeader); ++ } ++ ++ void setBackedgeVec(const int16_t &Backedge) { ++ Backedge_vec.push_back(Backedge); ++ } ++ ++ void setExitVec(const int16_t &Exit) { Exit_vec.push_back(Exit); } ++ ++ void setCallVec(const int16_t &Call) { Call_vec.push_back(Call); } ++ ++ void setBasicBlockSizeVec(const int64_t &BasicBlockSize) { ++ BasicBlockSize_vec.push_back(BasicBlockSize); ++ } ++ ++ void setFunExec(const uint64_t &BlockFuncExec) { FuncExec = BlockFuncExec; } ++ ++ void setParentChildNum(const int32_t &BlockParentChildNum) { ++ ParentChildNum = BlockParentChildNum; ++ } ++ ++ void setParentCount(const int32_t &BlockParentCount) { ++ ParentCount = BlockParentCount; ++ } ++ ++ void setChildParentNum(const int32_t &BlockChildParentNum) { ++ ChildParentNum = BlockChildParentNum; ++ } ++ ++ void setChildCount(const int32_t &BlockChildCount) { ++ ChildCount = BlockChildCount; ++ } ++ ++ void setInferenceFeatures() { ++ ++ if (Count == -1 || FallthroughCount == -1) { ++ return; ++ } ++ if (ParentChildNum == -1 && ParentCount == -1 && ChildParentNum == -1 && ++ ChildCount == -1) { ++ return; ++ } ++ ++ InferenceFeatures.push_back(static_cast(Direction)); ++ InferenceFeatures.push_back(static_cast(LoopHeader)); ++ InferenceFeatures.push_back(static_cast(ProcedureType)); ++ InferenceFeatures.push_back(static_cast(OperandRAType)); ++ InferenceFeatures.push_back(static_cast(OperandRBType)); ++ InferenceFeatures.push_back(static_cast(LoopHeader_vec[0])); ++ InferenceFeatures.push_back(static_cast(Backedge_vec[0])); ++ InferenceFeatures.push_back(static_cast(Exit_vec[0])); ++ InferenceFeatures.push_back(static_cast(LoopHeader_vec[1])); ++ InferenceFeatures.push_back(static_cast(Call_vec[0])); ++ InferenceFeatures.push_back(static_cast(LocalExitingBlock)); ++ InferenceFeatures.push_back(static_cast(HasIndirectCalls)); ++ InferenceFeatures.push_back(static_cast(LocalLatchBlock)); ++ InferenceFeatures.push_back(static_cast(LocalLoopHeader)); ++ InferenceFeatures.push_back(static_cast(Opcode)); ++ InferenceFeatures.push_back(static_cast(CmpOpcode)); ++ InferenceFeatures.push_back(static_cast(EndOpcode_vec[0])); ++ InferenceFeatures.push_back(static_cast(EndOpcode_vec[1])); ++ InferenceFeatures.push_back(static_cast(FuncExec)); ++ InferenceFeatures.push_back(static_cast(NumBasicBlocks)); ++ InferenceFeatures.push_back(static_cast(BasicBlockSize)); ++ InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[0])); ++ InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[1])); ++ InferenceFeatures.push_back(static_cast(LoopNumBlocks)); ++ InferenceFeatures.push_back(static_cast(NumLoads)); ++ InferenceFeatures.push_back(static_cast(NumCalls)); ++ InferenceFeatures.push_back(static_cast(TotalLoops)); ++ InferenceFeatures.push_back(static_cast(DeltaTaken)); ++ InferenceFeatures.push_back(static_cast(LoopDepth)); ++ InferenceFeatures.push_back(static_cast(ParentChildNum)); ++ InferenceFeatures.push_back(static_cast(ParentCount)); ++ InferenceFeatures.push_back(static_cast(ChildParentNum)); ++ InferenceFeatures.push_back(static_cast(ChildCount)); ++ } ++ ++ std::vector getInferenceFeatures() { return InferenceFeatures; } ++}; ++} // namespace bolt ++} // namespace llvm ++ ++#endif +\ No newline at end of file +diff --git a/bolt/include/bolt/Passes/FeatureMiner.h b/bolt/include/bolt/Passes/FeatureMiner.h +new file mode 100644 +index 000000000..6170aa62d +--- /dev/null ++++ b/bolt/include/bolt/Passes/FeatureMiner.h +@@ -0,0 +1,176 @@ ++//===--- Passes/FeatureMiner.h ++//---------------------------------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// A very simple feature extractor based on Calder's paper ++// Evidence-based static branch prediction using machine learning ++// https://dl.acm.org/doi/10.1145/239912.239923 ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ ++#define LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ ++ ++#include "bolt/Core/BinaryData.h" ++#include "bolt/Core/BinaryFunction.h" ++#include "bolt/Core/BinaryLoop.h" ++#include "bolt/Passes/BinaryPasses.h" ++#include "bolt/Passes/DominatorAnalysis.h" ++#include "bolt/Passes/StaticBranchInfo.h" ++#include "llvm/ADT/DenseMap.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++#include ++#include ++#include ++ ++namespace llvm { ++namespace bolt { ++ ++class FeatureMiner : public BinaryFunctionPass { ++private: ++ std::unique_ptr SBI; ++ /// BasicBlockInfo - This structure holds feature information about the target ++ /// BasicBlock of either the taken or the fallthrough paths of a given branch. ++ struct BasicBlockInfo { ++ std::optional BranchDominates; // 1 - dominates, 0 - does not dominate ++ std::optional ++ BranchPostdominates; // 1 - postdominates, 0 - does not PD ++ std::optional LoopHeader; // 1 - loop header, 0 - not a loop header ++ std::optional Backedge; // 1 - loop back, 0 - not a loop back ++ std::optional Exit; // 1 - loop exit, 0 - not a loop exit ++ std::optional Call; // 1 - program call, 0 - not a program call ++ std::optional NumCalls; ++ std::optional NumLoads; ++ std::optional NumStores; ++ std::optional EndOpcode; // 0 = NOTHING ++ std::string EndOpcodeStr = "UNDEF"; ++ std::optional BasicBlockSize; ++ std::string FromFunName = "UNDEF"; ++ uint32_t FromBb; ++ std::string ToFunName = "UNDEF"; ++ uint32_t ToBb; ++ ++ std::optional NumCallsExit; ++ std::optional NumCallsInvoke; ++ std::optional NumIndirectCalls; ++ std::optional NumTailCalls; ++ }; ++ ++ typedef std::unique_ptr BBIPtr; ++ ++ /// BranchFeaturesInfo - This structure holds feature information about each ++ /// two-way branch from the program. ++ struct BranchFeaturesInfo { ++ std::string OpcodeStr = "UNDEF"; ++ std::string CmpOpcodeStr = "UNDEF"; ++ bool Simple = 0; ++ ++ std::optional Opcode; ++ std::optional CmpOpcode; ++ std::optional Count; ++ std::optional MissPredicted; ++ std::optional FallthroughCount; ++ std::optional FallthroughMissPredicted; ++ BBIPtr TrueSuccessor = std::make_unique(); ++ BBIPtr FalseSuccessor = std::make_unique(); ++ std::optional ProcedureType; // 1 - Leaf, 0 - NonLeaf, 2 - CallSelf ++ std::optional LoopHeader; // 1 — loop header, 0 - not a loop header ++ std::optional Direction; // 1 - Forward Branch, 0 - Backward Branch ++ ++ std::optional NumOuterLoops; ++ std::optional TotalLoops; ++ std::optional MaximumLoopDepth; ++ std::optional LoopDepth; ++ std::optional LoopNumExitEdges; ++ std::optional LoopNumExitBlocks; ++ std::optional LoopNumExitingBlocks; ++ std::optional LoopNumLatches; ++ std::optional LoopNumBlocks; ++ std::optional LoopNumBackEdges; ++ std::optional NumLoads; ++ std::optional NumStores; ++ ++ std::optional LocalExitingBlock; ++ std::optional LocalLatchBlock; ++ std::optional LocalLoopHeader; ++ std::optional Call; ++ ++ std::optional NumCalls; ++ std::optional NumCallsExit; ++ std::optional NumCallsInvoke; ++ std::optional NumIndirectCalls; ++ std::optional NumTailCalls; ++ std::optional NumSelfCalls; ++ ++ std::optional NumBasicBlocks; ++ ++ std::optional DeltaTaken; ++ ++ std::optional OperandRAType; ++ std::optional OperandRBType; ++ ++ std::optional BasicBlockSize; ++ ++ std::optional BranchOffset; ++ }; ++ ++ typedef std::unique_ptr BFIPtr; ++ ++ std::vector BranchesInfoSet; ++ ++ /// getProcedureType - Determines which category the function falls into: ++ /// Leaf, Non-leaf or Calls-self. ++ int8_t getProcedureType(BinaryFunction &Function, BinaryContext &BC); ++ ++ /// addSuccessorInfo - Discovers feature information for the target successor ++ /// basic block, and inserts it into the static branch info container. ++ void addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, ++ BinaryContext &BC, BinaryBasicBlock &BB, bool SuccType); ++ ++ /// extractFeatures - Extracts the feature information for each two-way branch ++ /// from the program. ++ void extractFeatures(BinaryFunction &Function, BinaryContext &BC); ++ ++ void generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, ++ BFIPtr const &BFI, int Index); ++ /// dumpSuccessorFeatures - Dumps the feature information about the target ++ /// BasicBlock of either the taken or the fallthrough paths of a given branch. ++ void generateSuccessorFeatures(BBIPtr &Successor, ++ BinaryBasicBlockFeature *BBF); ++ ++ /// dumpFeatures - Dumps the feature information about each two-way branch ++ /// from the program. ++ void dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, ++ uint64_t FunctionFrequency); ++ ++ /// dumpProfileData - Dumps a limited version of the inout profile data ++ /// that contains only profile for conditional branches, unconditional ++ /// branches and terminators that aren't branches. ++ void dumpProfileData(BinaryFunction &Function, raw_ostream &Printer); ++ ++public: ++ explicit FeatureMiner(const cl::opt &PrintPass) ++ : BinaryFunctionPass(PrintPass) {} ++ ++ std::ofstream trainPrinter; ++ ++ const char *getName() const override { return "feature-miner"; } ++ ++ void runOnFunctions(BinaryContext &BC) override; ++ void inferenceFeatures(BinaryFunction &Function); ++ void generateProfileFeatures(BinaryBasicBlock *BB, ++ BinaryBasicBlockFeature *BBF); ++}; ++ ++} // namespace bolt ++} // namespace llvm ++ ++#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ +diff --git a/bolt/include/bolt/Passes/StaticBranchInfo.h b/bolt/include/bolt/Passes/StaticBranchInfo.h +new file mode 100644 +index 000000000..8de8df793 +--- /dev/null ++++ b/bolt/include/bolt/Passes/StaticBranchInfo.h +@@ -0,0 +1,108 @@ ++//===------ Passes/StaticBranchInfo.h -------------------------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This is an auxiliary class to the feature miner, static branch probability ++// and frequency passes. This class is responsible for finding loop info (loop ++// back edges, loop exit edges and loop headers) of a function. It also finds ++// basic block info (if a block contains store and call instructions) and if a ++// basic block contains a call to the exit. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ ++#define LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ ++ ++#include "bolt/Core/BinaryContext.h" ++#include "bolt/Core/BinaryFunction.h" ++#include "bolt/Core/BinaryLoop.h" ++#include "llvm/MC/MCSymbol.h" ++#include ++ ++namespace llvm { ++namespace bolt { ++ ++class StaticBranchInfo { ++ ++public: ++ /// An edge indicates that a control flow may go from a basic block (source) ++ /// to an other one (destination), and this pair of basic blocks will be used ++ /// to index maps and retrieve content of sets. ++ typedef std::pair Edge; ++ ++private: ++ /// Holds the loop headers of a given function. ++ DenseSet LoopHeaders; ++ ++ /// Holds the loop backedges of a given function. ++ DenseSet BackEdges; ++ ++ /// Holds the loop exit edges of a given function. ++ DenseSet ExitEdges; ++ ++ /// Holds the basic blocks of a given function ++ /// that contains at least one call instructions. ++ DenseSet CallSet; ++ ++ /// Holds the basic blocks of a given function ++ /// that contains at least one store instructions. ++ DenseSet StoreSet; ++ ++ unsigned NumLoads; ++ unsigned NumStores; ++ ++public: ++ unsigned getNumLoads() { return NumLoads; } ++ ++ unsigned getNumStores() { return NumStores; } ++ ++ /// findLoopEdgesInfo - Finds all loop back edges, loop exit eges ++ /// and loop headers within the function. ++ void findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo); ++ ++ /// findBasicBlockInfo - Finds all call and store instructions within ++ /// the basic blocks of a given function. ++ void findBasicBlockInfo(const BinaryFunction &Function, BinaryContext &BC); ++ ++ /// isBackEdge - Checks if the edge is a loop back edge. ++ bool isBackEdge(const Edge &CFGEdge) const; ++ ++ /// isBackEdge - Checks if the edge is a loop back edge. ++ bool isBackEdge(const BinaryBasicBlock *SrcBB, ++ const BinaryBasicBlock *DstBB) const; ++ ++ /// isExitEdge - Checks if the edge is a loop exit edge. ++ bool isExitEdge(const BinaryLoop::Edge &CFGEdge) const; ++ ++ /// isExitEdge - Checks if the edge is a loop exit edge. ++ bool isExitEdge(const BinaryBasicBlock *SrcBB, ++ const BinaryBasicBlock *DstBB) const; ++ ++ /// isLoopHeader - Checks if the basic block is a loop header. ++ bool isLoopHeader(const BinaryBasicBlock *BB) const; ++ ++ /// hasCallInst - Checks if the basic block has a call instruction. ++ bool hasCallInst(const BinaryBasicBlock *BB) const; ++ ++ /// hasStoreInst - Checks if the basic block has a store instruction. ++ bool hasStoreInst(const BinaryBasicBlock *BB) const; ++ ++ /// countBackEdges - Compute the number of BB's successor that are back edges. ++ unsigned countBackEdges(BinaryBasicBlock *BB) const; ++ ++ /// countExitEdges - Compute the number of BB's successor that are exit edges. ++ unsigned countExitEdges(BinaryBasicBlock *BB) const; ++ ++ /// clear - Cleans up all the content from the data structs used. ++ void clear(); ++}; ++ ++} // namespace bolt ++} // namespace llvm ++ ++#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ +diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h +index 916b4f7e2..bf732d47c 100644 +--- a/bolt/include/bolt/Profile/DataReader.h ++++ b/bolt/include/bolt/Profile/DataReader.h +@@ -22,6 +22,7 @@ + #include "llvm/Support/MemoryBuffer.h" + #include "llvm/Support/raw_ostream.h" + #include ++#include + #include + #include + +@@ -44,6 +45,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { + return OS; + } + ++extern "C" { ++typedef void *(*CreateONNXRunnerFunc)(const char *); ++typedef void (*DeleteONNXRunnerFunc)(void *); ++typedef std::vector (*RunONNXModelFunc)(void *, ++ const std::vector &, ++ const std::vector &, ++ const std::vector &, int); ++} ++ + struct Location { + bool IsSymbol; + StringRef Name; +@@ -263,7 +273,8 @@ struct FuncSampleData { + class DataReader : public ProfileReaderBase { + public: + explicit DataReader(StringRef Filename) +- : ProfileReaderBase(Filename), Diag(errs()) {} ++ : ProfileReaderBase(Filename), Diag(errs()), onnxRunner(nullptr), ++ libHandle(nullptr), handleOnnxRuntime(nullptr) {} + + StringRef getReaderName() const override { return "branch profile reader"; } + +@@ -282,7 +293,87 @@ public: + /// Return all event names used to collect this profile + StringSet<> getEventNames() const override { return EventNames; } + ++ ~DataReader() { ++ // delete onnxrunner; ++ if (onnxRunner && libHandle && handleOnnxRuntime) { ++ DeleteONNXRunnerFunc deleteONNXRunner = ++ (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); ++ deleteONNXRunner(onnxRunner); ++ dlclose(libHandle); ++ dlclose(handleOnnxRuntime); ++ } ++ } ++ ++ /// Initialize the onnxruntime model. ++ void initializeONNXRunner(const std::string &modelPath) { ++ if (!onnxRunner && !libHandle && !handleOnnxRuntime) { ++ handleOnnxRuntime = ++ dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); ++ if (handleOnnxRuntime == nullptr) { ++ outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; ++ exit(1); ++ } ++ libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); ++ if (libHandle == nullptr) { ++ outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; ++ exit(1); ++ } ++ CreateONNXRunnerFunc createONNXRunner = ++ (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner"); ++ onnxRunner = createONNXRunner(modelPath.c_str()); ++ } ++ } ++ ++ /// Inference step for predicting the BB counts based on the BB features. ++ float ONNXInference(const std::vector &input_string, ++ const std::vector &input_int64, ++ const std::vector &input_float, int batch_size = 1) { ++ if (onnxRunner && libHandle) { ++ RunONNXModelFunc runONNXModel = ++ (RunONNXModelFunc)dlsym(libHandle, "runONNXModel"); ++ std::vector model_preds = runONNXModel( ++ onnxRunner, input_string, input_int64, input_float, batch_size); ++ if (model_preds.size() <= 0) { ++ outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; ++ exit(1); ++ } ++ float pred = model_preds[0]; ++ return pred; ++ } ++ return -1.0; ++ } ++ ++ /// Return the annotating threshold for the model prediction. ++ void setThreshold(float annotate_threshold) { ++ threshold = annotate_threshold; ++ } ++ + protected: ++ /// The onnxruntime model pointer read from the input model path. ++ void *onnxRunner; ++ ++ /// The library handle of the ai4compiler framwork. ++ void *libHandle; ++ ++ /// The library handle of the onnxruntime. ++ void *handleOnnxRuntime; ++ ++ /// The annotating threshold for the model prediction. ++ float threshold; ++ ++ /// Return the annotating threshold for the model prediction. ++ float getThreshold() const { return threshold; } ++ ++ /// The counting value of the total modified BB-count number. ++ uint64_t modified_BB_total = 0; ++ ++ /// Add the total modified BB-count number by the BB modifiied number within ++ /// the funciton. ++ void addModifiedBBTotal(uint64_t &value) { modified_BB_total += value; } ++ ++ /// Return the counting value of the total modified BB-count number. ++ uint64_t getModifiedBBTotal() const { return modified_BB_total; } ++ + /// Read profile information available for the function. + void readProfile(BinaryFunction &BF); + +diff --git a/bolt/lib/Core/BinaryBasicBlockFeature.cpp b/bolt/lib/Core/BinaryBasicBlockFeature.cpp +new file mode 100644 +index 000000000..e1a2a3dd8 +--- /dev/null ++++ b/bolt/lib/Core/BinaryBasicBlockFeature.cpp +@@ -0,0 +1,21 @@ ++//===- bolt/Core/BinaryBasicBlockFeature.cpp - Low-level basic block ++//-------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements the BinaryBasicBlock class. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "bolt/Core/BinaryBasicBlock.h" ++#include "bolt/Core/BinaryBasicBlockFeature.h" ++ ++#define DEBUG_TYPE "bolt" ++ ++namespace llvm { ++namespace bolt {} // namespace bolt ++} // namespace llvm +\ No newline at end of file +diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt +index a4612fb93..f93147d39 100644 +--- a/bolt/lib/Core/CMakeLists.txt ++++ b/bolt/lib/Core/CMakeLists.txt +@@ -12,6 +12,7 @@ set(LLVM_LINK_COMPONENTS + + add_llvm_library(LLVMBOLTCore + BinaryBasicBlock.cpp ++ BinaryBasicBlockFeature.cpp + BinaryContext.cpp + BinaryData.cpp + BinaryEmitter.cpp +diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt +index b8bbe59a6..e9ccea17c 100644 +--- a/bolt/lib/Passes/CMakeLists.txt ++++ b/bolt/lib/Passes/CMakeLists.txt +@@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses + DataflowInfoManager.cpp + FrameAnalysis.cpp + FrameOptimizer.cpp ++ FeatureMiner.cpp + FixRelaxationPass.cpp + FixRISCVCallsPass.cpp + HFSort.cpp +@@ -41,6 +42,7 @@ add_llvm_library(LLVMBOLTPasses + StackAvailableExpressions.cpp + StackPointerTracking.cpp + StackReachingUses.cpp ++ StaticBranchInfo.cpp + StokeInfo.cpp + TailDuplication.cpp + ThreeWayBranch.cpp +diff --git a/bolt/lib/Passes/FeatureMiner.cpp b/bolt/lib/Passes/FeatureMiner.cpp +new file mode 100644 +index 000000000..d93aef648 +--- /dev/null ++++ b/bolt/lib/Passes/FeatureMiner.cpp +@@ -0,0 +1,572 @@ ++//===--- Passes/FeatureMiner.cpp ------------------------------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// A very simple feature extractor based on Calder's paper ++// Evidence-based static branch prediction using machine learning ++// https://dl.acm.org/doi/10.1145/239912.239923 ++//===----------------------------------------------------------------------===// ++ ++#include "bolt/Passes/DataflowInfoManager.h" ++#include "bolt/Passes/FeatureMiner.h" ++#include "bolt/Passes/StaticBranchInfo.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/FileSystem.h" ++ ++#undef DEBUG_TYPE ++#define DEBUG_TYPE "bolt-feature-miner" ++ ++using namespace llvm; ++using namespace bolt; ++ ++namespace opts { ++extern cl::opt BlockCorrection; ++ ++} // namespace opts ++ ++namespace llvm { ++namespace bolt { ++ ++class BinaryFunction; ++ ++int8_t FeatureMiner::getProcedureType(BinaryFunction &Function, ++ BinaryContext &BC) { ++ int8_t ProcedureType = 1; ++ for (auto &BB : Function) { ++ for (auto &Inst : BB) { ++ if (BC.MIB->isCall(Inst)) { ++ ProcedureType = 0; // non-leaf type ++ if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { ++ const auto *Callee = BC.getFunctionForSymbol(CalleeSymbol); ++ if (Callee && ++ Callee->getFunctionNumber() == Function.getFunctionNumber()) { ++ return 2; // call self type ++ } ++ } ++ } ++ } ++ } ++ return ProcedureType; // leaf type ++} ++ ++void FeatureMiner::addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, ++ BinaryContext &BC, BinaryBasicBlock &BB, ++ bool SuccType) { ++ ++ BinaryBasicBlock *Successor = BB.getConditionalSuccessor(SuccType); ++ ++ if (!Successor) ++ return; ++ ++ unsigned NumCalls{0}; ++ ++ for (auto &Inst : BB) { ++ if (BC.MIB->isCall(Inst)) { ++ ++NumCalls; ++ } ++ } ++ ++ BBIPtr SuccBBInfo = std::make_unique(); ++ ++ // Check if the successor basic block is a loop header and store it. ++ SuccBBInfo->LoopHeader = SBI->isLoopHeader(Successor); ++ ++ SuccBBInfo->BasicBlockSize = Successor->size(); ++ ++ // Check if the edge getting to the successor basic block is a loop ++ // exit edge and store it. ++ SuccBBInfo->Exit = SBI->isExitEdge(&BB, Successor); ++ ++ // Check if the edge getting to the successor basic block is a loop ++ // back edge and store it. ++ SuccBBInfo->Backedge = SBI->isBackEdge(&BB, Successor); ++ ++ MCInst *SuccInst = Successor->getTerminatorBefore(nullptr); ++ ++ // Store information about the branch type ending sucessor basic block ++ SuccBBInfo->EndOpcode = (SuccInst && BC.MIA->isBranch(*SuccInst)) ++ ? SuccInst->getOpcode() ++ : 0; // 0 = NOTHING ++ ++ // Check if the successor basic block contains ++ // a procedure call and store it. ++ SuccBBInfo->Call = (NumCalls > 0) ? 1 // Contains a call instruction ++ : 0; // Does not contain a call instruction ++ ++ uint32_t Offset = BB.getEndOffset(); ++ ++ if (SuccType) { ++ BFI->TrueSuccessor = std::move(SuccBBInfo); ++ // Check if the taken branch is a forward ++ // or a backwards branch and store it ++ BFI->Direction = (Function.isForwardBranch(&BB, Successor) == true) ++ ? 1 // Forward branch ++ : 0; // Backwards branch ++ ++ auto OnlyBranchInfo = BB.getOnlyBranchInfo(); ++ BFI->Count = OnlyBranchInfo.Count; ++ ++ if (Offset) { ++ uint32_t TargetOffset = Successor->getInputOffset(); ++ uint32_t BranchOffset = Offset; ++ if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { ++ int64_t Delta = static_cast(TargetOffset) - ++ static_cast(BranchOffset); ++ BFI->DeltaTaken = std::abs(Delta); ++ } ++ } ++ } else { ++ if (BB.succ_size() == 2) { ++ auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); ++ BFI->FallthroughCount = FallthroughBranchInfo.Count; ++ } else { ++ auto OnlyBranchInfo = BB.getOnlyBranchInfo(); ++ BFI->FallthroughCount = OnlyBranchInfo.Count; ++ } ++ BFI->FalseSuccessor = std::move(SuccBBInfo); ++ } ++} ++ ++void FeatureMiner::extractFeatures(BinaryFunction &Function, ++ BinaryContext &BC) { ++ int8_t ProcedureType = getProcedureType(Function, BC); ++ auto Info = DataflowInfoManager(Function, nullptr, nullptr); ++ const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); ++ ++ bool Simple = Function.isSimple(); ++ const auto &Order = Function.dfs(); ++ std::string Function_name = Function.getPrintName(); ++ ++ for (auto *BBA : Order) { ++ ++ auto &BB = *BBA; ++ ++ BinaryBasicBlockFeature BBF = BB.getFeatures(); ++ ++ unsigned TotalLoops{0}; ++ unsigned LoopDepth{0}; ++ unsigned LoopNumBlocks{0}; ++ ++ bool LocalExitingBlock{false}; ++ bool LocalLatchBlock{false}; ++ bool LocalLoopHeader{false}; ++ ++ generateProfileFeatures(&BB, &BBF); ++ ++ BinaryLoop *Loop = LoopsInfo.getLoopFor(&BB); ++ if (Loop) { ++ SmallVector ExitingBlocks; ++ Loop->getExitingBlocks(ExitingBlocks); ++ ++ SmallVector ExitBlocks; ++ Loop->getExitBlocks(ExitBlocks); ++ ++ SmallVector ExitEdges; ++ Loop->getExitEdges(ExitEdges); ++ ++ SmallVector Latches; ++ Loop->getLoopLatches(Latches); ++ ++ TotalLoops = LoopsInfo.TotalLoops; ++ LoopDepth = Loop->getLoopDepth(); ++ LoopNumBlocks = Loop->getNumBlocks(); ++ LocalExitingBlock = Loop->isLoopExiting(&BB); ++ LocalLatchBlock = Loop->isLoopLatch(&BB); ++ LocalLoopHeader = ((Loop->getHeader() == (&BB)) ? 1 : 0); ++ } ++ ++ unsigned NumLoads{0}; ++ unsigned NumCalls{0}; ++ unsigned NumIndirectCalls{0}; ++ ++ for (auto &Inst : BB) { ++ if (BC.MIB->isLoad(Inst)) { ++ ++NumLoads; ++ } else if (BC.MIB->isCall(Inst)) { ++ ++NumCalls; ++ if (BC.MIB->isIndirectCall(Inst)) ++ ++NumIndirectCalls; ++ } ++ } ++ ++ int Index = -2; ++ bool LoopHeader = SBI->isLoopHeader(&BB); ++ ++ BFIPtr BFI = std::make_unique(); ++ ++ BFI->TotalLoops = TotalLoops; ++ BFI->LoopDepth = LoopDepth; ++ BFI->LoopNumBlocks = LoopNumBlocks; ++ BFI->LocalExitingBlock = LocalExitingBlock; ++ BFI->LocalLatchBlock = LocalLatchBlock; ++ BFI->LocalLoopHeader = LocalLoopHeader; ++ BFI->NumCalls = NumCalls; ++ BFI->BasicBlockSize = BB.size(); ++ BFI->NumBasicBlocks = Function.size(); ++ ++ BFI->NumLoads = NumLoads; ++ BFI->NumIndirectCalls = NumIndirectCalls; ++ BFI->LoopHeader = LoopHeader; ++ BFI->ProcedureType = ProcedureType; ++ ++ // Adding taken successor info. ++ addSuccessorInfo(BFI, Function, BC, BB, true); ++ // Adding fall through successor info. ++ addSuccessorInfo(BFI, Function, BC, BB, false); ++ ++ MCInst ConditionalInst; ++ bool hasConditionalBranch = false; ++ MCInst UnconditionalInst; ++ bool hasUnconditionalBranch = false; ++ ++ for (auto &Inst : BB) { ++ ++Index; ++ if (!BC.MIA->isConditionalBranch(Inst) && ++ !BC.MIA->isUnconditionalBranch(Inst)) ++ continue; ++ ++ generateInstFeatures(BC, BB, BFI, Index); ++ ++ if (BC.MIA->isConditionalBranch(Inst)) { ++ ConditionalInst = Inst; ++ hasConditionalBranch = true; ++ } ++ ++ if (BC.MIA->isUnconditionalBranch(Inst)) { ++ UnconditionalInst = Inst; ++ hasUnconditionalBranch = true; ++ } ++ } ++ ++ if (hasConditionalBranch) { ++ BFI->Opcode = ConditionalInst.getOpcode(); ++ ++ } else { ++ if (hasUnconditionalBranch) { ++ BFI->Opcode = UnconditionalInst.getOpcode(); ++ ++ } else { ++ auto Inst = BB.getLastNonPseudoInstr(); ++ BFI->Opcode = Inst->getOpcode(); ++ generateInstFeatures(BC, BB, BFI, Index); ++ } ++ } ++ ++ auto &FalseSuccessor = BFI->FalseSuccessor; ++ auto &TrueSuccessor = BFI->TrueSuccessor; ++ ++ int16_t ProcedureType = (BFI->ProcedureType.has_value()) ++ ? static_cast(*(BFI->ProcedureType)) ++ : -1; ++ ++ int64_t Count = ++ (BFI->Count.has_value()) ? static_cast(*(BFI->Count)) : -1; ++ ++ int64_t FallthroughCount = ++ (BFI->FallthroughCount.has_value()) ++ ? static_cast(*(BFI->FallthroughCount)) ++ : -1; ++ ++ int16_t LoopHeaderValid = (BFI->LoopHeader.has_value()) ++ ? static_cast(*(BFI->LoopHeader)) ++ : -1; ++ ++ int64_t TotalLoopsValid = (BFI->TotalLoops.has_value()) ++ ? static_cast(*(BFI->TotalLoops)) ++ : -1; ++ int64_t LoopDepthValid = (BFI->LoopDepth.has_value()) ++ ? static_cast(*(BFI->LoopDepth)) ++ : -1; ++ int64_t LoopNumBlocksValid = ++ (BFI->LoopNumBlocks.has_value()) ++ ? static_cast(*(BFI->LoopNumBlocks)) ++ : -1; ++ int64_t LocalExitingBlockValid = ++ (BFI->LocalExitingBlock.has_value()) ++ ? static_cast(*(BFI->LocalExitingBlock)) ++ : -1; ++ ++ int64_t LocalLatchBlockValid = ++ (BFI->LocalLatchBlock.has_value()) ++ ? static_cast(*(BFI->LocalLatchBlock)) ++ : -1; ++ ++ int64_t LocalLoopHeaderValid = ++ (BFI->LocalLoopHeader.has_value()) ++ ? static_cast(*(BFI->LocalLoopHeader)) ++ : -1; ++ ++ int32_t CmpOpcode = (BFI->CmpOpcode.has_value()) ++ ? static_cast(*(BFI->CmpOpcode)) ++ : -1; ++ ++ int64_t OperandRAType = (BFI->OperandRAType.has_value()) ++ ? static_cast(*(BFI->OperandRAType)) ++ : 10; ++ ++ int64_t OperandRBType = (BFI->OperandRBType.has_value()) ++ ? static_cast(*(BFI->OperandRBType)) ++ : 10; ++ int16_t Direction = (BFI->Direction.has_value()) ++ ? static_cast(*(BFI->Direction)) ++ : -1; ++ ++ int64_t DeltaTaken = (BFI->DeltaTaken.has_value()) ++ ? static_cast(*(BFI->DeltaTaken)) ++ : -1; ++ ++ int64_t NumLoadsValid = (BFI->NumLoads.has_value()) ++ ? static_cast(*(BFI->NumLoads)) ++ : -1; ++ ++ int64_t BasicBlockSize = (BFI->BasicBlockSize.has_value()) ++ ? static_cast(*(BFI->BasicBlockSize)) ++ : -1; ++ ++ int64_t NumBasicBlocks = (BFI->NumBasicBlocks.has_value()) ++ ? static_cast(*(BFI->NumBasicBlocks)) ++ : -1; ++ ++ int64_t NumCallsValid = (BFI->NumCalls.has_value()) ++ ? static_cast(*(BFI->NumCalls)) ++ : -1; ++ ++ int64_t NumIndirectCallsValid = ++ (BFI->NumIndirectCalls.has_value()) ++ ? static_cast(*(BFI->NumIndirectCalls)) ++ : -1; ++ ++ int64_t HasIndirectCalls = (NumIndirectCallsValid > 0) ? 1 : 0; ++ ++ int32_t Opcode = ++ (BFI->Opcode.has_value()) ? static_cast(*(BFI->Opcode)) : -1; ++ ++ uint64_t fun_exec = Function.getExecutionCount(); ++ fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; ++ ++ BBF.setDirection(Direction); ++ BBF.setDeltaTaken(DeltaTaken); ++ BBF.setOpcode(Opcode); ++ BBF.setCmpOpcode(CmpOpcode); ++ BBF.setOperandRAType(OperandRAType); ++ BBF.setOperandRBType(OperandRBType); ++ BBF.setFunExec(fun_exec); ++ BBF.setTotalLoops(TotalLoopsValid); ++ BBF.setLoopDepth(LoopDepthValid); ++ BBF.setLoopNumBlocks(LoopNumBlocksValid); ++ BBF.setLocalExitingBlock(LocalExitingBlockValid); ++ BBF.setLocalLatchBlock(LocalLatchBlockValid); ++ BBF.setLocalLoopHeader(LocalLoopHeaderValid); ++ BBF.setNumCalls(NumCallsValid); ++ BBF.setBasicBlockSize(BasicBlockSize); ++ BBF.setNumBasicBlocks(NumBasicBlocks); ++ BBF.setNumLoads(NumLoadsValid); ++ BBF.setHasIndirectCalls(HasIndirectCalls); ++ BBF.setLoopHeader(LoopHeaderValid); ++ BBF.setProcedureType(ProcedureType); ++ BBF.setCount(Count); ++ BBF.setFallthroughCount(FallthroughCount); ++ ++ generateSuccessorFeatures(TrueSuccessor, &BBF); ++ generateSuccessorFeatures(FalseSuccessor, &BBF); ++ ++ FalseSuccessor.reset(); ++ TrueSuccessor.reset(); ++ ++ BBF.setInferenceFeatures(); ++ BB.setFeatures(BBF); ++ ++ BFI.reset(); ++ } ++} ++ ++void FeatureMiner::generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, ++ BFIPtr const &BFI, int Index) { ++ ++ // Holds the branch opcode info. ++ ++ BFI->CmpOpcode = 0; ++ if (Index > -1) { ++ auto Cmp = BB.begin() + Index; ++ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { ++ // Holding the branch comparison opcode info. ++ BFI->CmpOpcode = (*Cmp).getOpcode(); ++ auto getOperandType = [&](const MCOperand &Operand) -> int32_t { ++ if (Operand.isReg()) ++ return 0; ++ else if (Operand.isImm()) ++ return 1; ++ else if (Operand.isSFPImm()) ++ return 2; ++ else if (Operand.isExpr()) ++ return 3; ++ else ++ return -1; ++ }; ++ ++ const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); ++ unsigned NumDefs = InstInfo.getNumDefs(); ++ int32_t NumPrimeOperands = MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; ++ switch (NumPrimeOperands) { ++ case 6: { ++ int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); ++ int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); ++ ++ if (RBType == 0 && RAType == 0) { ++ BFI->OperandRBType = RBType; ++ BFI->OperandRAType = RAType; ++ } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { ++ RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); ++ ++ if (RAType != 1 && RAType != 2) { ++ RAType = -1; ++ } ++ ++ BFI->OperandRBType = RBType; ++ BFI->OperandRAType = RAType; ++ } else { ++ BFI->OperandRAType = -1; ++ BFI->OperandRBType = -1; ++ } ++ break; ++ } ++ case 2: ++ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); ++ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); ++ break; ++ case 3: ++ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); ++ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); ++ break; ++ case 1: ++ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); ++ break; ++ default: ++ BFI->OperandRAType = -1; ++ BFI->OperandRBType = -1; ++ break; ++ } ++ ++ } else { ++ Index -= 1; ++ for (int Idx = Index; Idx > -1; Idx--) { ++ auto Cmp = BB.begin() + Idx; ++ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { ++ // Holding the branch comparison opcode info. ++ BFI->CmpOpcode = (*Cmp).getOpcode(); ++ break; ++ } ++ } ++ } ++ } ++} ++ ++void FeatureMiner::generateSuccessorFeatures(BBIPtr &Successor, ++ BinaryBasicBlockFeature *BBF) { ++ ++ int16_t LoopHeader = (Successor->LoopHeader.has_value()) ++ ? static_cast(*(Successor->LoopHeader)) ++ : -1; ++ ++ int16_t Backedge = (Successor->Backedge.has_value()) ++ ? static_cast(*(Successor->Backedge)) ++ : -1; ++ ++ int16_t Exit = (Successor->Exit.has_value()) ++ ? static_cast(*(Successor->Exit)) ++ : -1; ++ ++ int16_t Call = (Successor->Call.has_value()) ++ ? static_cast(*(Successor->Call)) ++ : -1; ++ ++ int32_t EndOpcode = (Successor->EndOpcode.has_value()) ++ ? static_cast(*(Successor->EndOpcode)) ++ : -1; ++ ++ int64_t BasicBlockSize = ++ (Successor->BasicBlockSize.has_value()) ++ ? static_cast(*(Successor->BasicBlockSize)) ++ : -1; ++ ++ BBF->setEndOpcodeVec(EndOpcode); ++ BBF->setLoopHeaderVec(LoopHeader); ++ BBF->setBackedgeVec(Backedge); ++ BBF->setExitVec(Exit); ++ BBF->setCallVec(Call); ++ BBF->setBasicBlockSizeVec(BasicBlockSize); ++} ++ ++void FeatureMiner::runOnFunctions(BinaryContext &BC) {} ++ ++void FeatureMiner::inferenceFeatures(BinaryFunction &Function) { ++ ++ SBI = std::make_unique(); ++ ++ if (Function.empty()) ++ return; ++ ++ if (!Function.isLoopFree()) { ++ const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); ++ SBI->findLoopEdgesInfo(LoopsInfo); ++ } ++ ++ BinaryContext &BC = Function.getBinaryContext(); ++ extractFeatures(Function, BC); ++ ++ SBI->clear(); ++} ++ ++void FeatureMiner::generateProfileFeatures(BinaryBasicBlock *BB, ++ BinaryBasicBlockFeature *BBF) { ++ int32_t parentChildNum, parentCount, childParentNum, childCount; ++ ++ if (BB->getParentSet().size() == 0) { ++ parentChildNum = -1; ++ parentCount = -1; ++ } else { ++ parentChildNum = std::numeric_limits::max(); ++ parentCount = 0; ++ for (BinaryBasicBlock *parent : BB->getParentSet()) { ++ if (parent->getChildrenSet().size() < parentChildNum) { ++ parentChildNum = parent->getChildrenSet().size(); ++ parentCount = parent->getExecutionCount(); ++ } else if (parent->getChildrenSet().size() == parentChildNum && ++ parent->getExecutionCount() > parentCount) { ++ parentCount = parent->getExecutionCount(); ++ } ++ } ++ } ++ ++ if (BB->getChildrenSet().size() == 0) { ++ childParentNum = -1; ++ childCount = -1; ++ } else { ++ childParentNum = std::numeric_limits::max(); ++ childCount = 0; ++ for (BinaryBasicBlock *child : BB->getChildrenSet()) { ++ if (child->getParentSet().size() < childParentNum) { ++ childParentNum = child->getParentSet().size(); ++ childCount = child->getExecutionCount(); ++ } else if (child->getParentSet().size() == childParentNum && ++ child->getExecutionCount() > childCount) { ++ childCount = child->getExecutionCount(); ++ } ++ } ++ } ++ ++ int64_t parentCountCatch = parentCount > 0 ? 1 : 0; ++ int64_t childCountCatch = childCount > 0 ? 1 : 0; ++ ++ BBF->setParentChildNum(parentChildNum); ++ BBF->setParentCount(parentCountCatch); ++ BBF->setChildParentNum(childParentNum); ++ BBF->setChildCount(childCountCatch); ++} ++ ++} // namespace bolt ++} // namespace llvm +\ No newline at end of file +diff --git a/bolt/lib/Passes/StaticBranchInfo.cpp b/bolt/lib/Passes/StaticBranchInfo.cpp +new file mode 100644 +index 000000000..585dbcae2 +--- /dev/null ++++ b/bolt/lib/Passes/StaticBranchInfo.cpp +@@ -0,0 +1,143 @@ ++//===------ Passes/StaticBranchInfo.cpp -----------------------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This is an auxiliary class to the feature miner, static branch probability ++// and frequency passes. This class is responsible for finding loop info (loop ++// back edges, loop exit edges and loop headers) of a function. It also finds ++// basic block info (if a block contains store and call instructions) and if a ++// basic block contains a call to the exit. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "bolt/Core/BinaryBasicBlock.h" ++#include "bolt/Core/BinaryLoop.h" ++#include "bolt/Passes/StaticBranchInfo.h" ++ ++namespace llvm { ++namespace bolt { ++ ++void StaticBranchInfo::findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo) { ++ // Traverse discovered loops ++ std::stack Loops; ++ for (BinaryLoop *BL : LoopsInfo) ++ Loops.push(BL); ++ ++ while (!Loops.empty()) { ++ BinaryLoop *Loop = Loops.top(); ++ Loops.pop(); ++ BinaryBasicBlock *LoopHeader = Loop->getHeader(); ++ LoopHeaders.insert(LoopHeader); ++ ++ // Add nested loops in the stack. ++ for (BinaryLoop::iterator I = Loop->begin(), E = Loop->end(); I != E; ++I) { ++ Loops.push(*I); ++ } ++ ++ SmallVector Latches; ++ Loop->getLoopLatches(Latches); ++ ++ // Find back edges. ++ for (BinaryBasicBlock *Latch : Latches) { ++ for (BinaryBasicBlock *Succ : Latch->successors()) { ++ if (Succ == LoopHeader) { ++ Edge CFGEdge = std::make_pair(Latch->getLabel(), Succ->getLabel()); ++ BackEdges.insert(CFGEdge); ++ } ++ } ++ } ++ ++ // Find exit edges. ++ SmallVector AuxExitEdges; ++ Loop->getExitEdges(AuxExitEdges); ++ for (BinaryLoop::Edge &Exit : AuxExitEdges) { ++ ExitEdges.insert(Exit); ++ } ++ } ++} ++ ++void StaticBranchInfo::findBasicBlockInfo(const BinaryFunction &Function, ++ BinaryContext &BC) { ++ for (auto &BB : Function) { ++ for (auto &Inst : BB) { ++ if (BC.MIB->isCall(Inst)) ++ CallSet.insert(&BB); ++ else if (BC.MIB->isStore(Inst)) ++ StoreSet.insert(&BB); ++ } ++ } ++} ++ ++bool StaticBranchInfo::isBackEdge(const Edge &CFGEdge) const { ++ return BackEdges.count(CFGEdge); ++} ++ ++bool StaticBranchInfo::isBackEdge(const BinaryBasicBlock *SrcBB, ++ const BinaryBasicBlock *DstBB) const { ++ const Edge CFGEdge = std::make_pair(SrcBB->getLabel(), DstBB->getLabel()); ++ return isBackEdge(CFGEdge); ++} ++ ++bool StaticBranchInfo::isExitEdge(const BinaryLoop::Edge &CFGEdge) const { ++ return ExitEdges.count(CFGEdge); ++} ++ ++bool StaticBranchInfo::isExitEdge(const BinaryBasicBlock *SrcBB, ++ const BinaryBasicBlock *DstBB) const { ++ const BinaryLoop::Edge CFGEdge = ++ std::make_pair(const_cast(SrcBB), ++ const_cast(DstBB)); ++ return isExitEdge(CFGEdge); ++} ++ ++bool StaticBranchInfo::isLoopHeader(const BinaryBasicBlock *BB) const { ++ return LoopHeaders.count(BB); ++} ++ ++bool StaticBranchInfo::hasCallInst(const BinaryBasicBlock *BB) const { ++ return CallSet.count(BB); ++} ++ ++bool StaticBranchInfo::hasStoreInst(const BinaryBasicBlock *BB) const { ++ return StoreSet.count(BB); ++} ++ ++unsigned StaticBranchInfo::countBackEdges(BinaryBasicBlock *BB) const { ++ unsigned CountEdges = 0; ++ ++ for (BinaryBasicBlock *SuccBB : BB->successors()) { ++ const Edge CFGEdge = std::make_pair(BB->getLabel(), SuccBB->getLabel()); ++ if (BackEdges.count(CFGEdge)) ++ ++CountEdges; ++ } ++ ++ return CountEdges; ++} ++ ++unsigned StaticBranchInfo::countExitEdges(BinaryBasicBlock *BB) const { ++ unsigned CountEdges = 0; ++ ++ for (BinaryBasicBlock *SuccBB : BB->successors()) { ++ const BinaryLoop::Edge CFGEdge = std::make_pair(BB, SuccBB); ++ if (ExitEdges.count(CFGEdge)) ++ ++CountEdges; ++ } ++ ++ return CountEdges; ++} ++ ++void StaticBranchInfo::clear() { ++ LoopHeaders.clear(); ++ BackEdges.clear(); ++ ExitEdges.clear(); ++ CallSet.clear(); ++ StoreSet.clear(); ++} ++ ++} // namespace bolt ++} // namespace llvm +diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp +index 0e12e8cb3..447b71fe7 100644 +--- a/bolt/lib/Profile/DataReader.cpp ++++ b/bolt/lib/Profile/DataReader.cpp +@@ -12,13 +12,16 @@ + //===----------------------------------------------------------------------===// + + #include "bolt/Profile/DataReader.h" ++#include "bolt/Passes/FeatureMiner.h" + #include "bolt/Core/BinaryFunction.h" + #include "bolt/Passes/MCF.h" + #include "bolt/Utils/Utils.h" + #include "llvm/Support/CommandLine.h" + #include "llvm/Support/Debug.h" + #include "llvm/Support/Errc.h" ++#include + #include ++#include + + #undef DEBUG_TYPE + #define DEBUG_TYPE "bolt-prof" +@@ -26,15 +29,23 @@ + using namespace llvm; + + namespace opts { +- ++extern cl::opt BlockCorrection; + extern cl::OptionCategory BoltCategory; + extern llvm::cl::opt Verbosity; + +-static cl::opt +-DumpData("dump-data", +- cl::desc("dump parsed bolt data for debugging"), +- cl::Hidden, +- cl::cat(BoltCategory)); ++static cl::opt InputModelFilename("model-path", ++ cl::desc(""), ++ cl::Optional, ++ cl::cat(BoltCategory)); ++ ++static cl::opt AnnotateThreshold( ++ "annotate-threshold", ++ cl::desc(""), ++ cl::init(0.85f), cl::Optional, cl::cat(BoltCategory)); ++ ++static cl::opt DumpData("dump-data", ++ cl::desc("dump parsed bolt data for debugging"), ++ cl::Hidden, cl::cat(BoltCategory)); + + } // namespace opts + +@@ -311,6 +322,17 @@ Error DataReader::readProfilePreCFG(BinaryContext &BC) { + } + + Error DataReader::readProfile(BinaryContext &BC) { ++ ++ if (opts::BlockCorrection) { ++ if (opts::InputModelFilename.empty()) { ++ outs() << "error: llvm-bolt expected -model-path= option.\n"; ++ exit(1); ++ } else { ++ DataReader::initializeONNXRunner(opts::InputModelFilename); ++ DataReader::setThreshold(opts::AnnotateThreshold); ++ } ++ } ++ + for (auto &BFI : BC.getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + readProfile(Function); +@@ -324,6 +346,12 @@ Error DataReader::readProfile(BinaryContext &BC) { + } + BC.setNumUnusedProfiledObjects(NumUnused); + ++ if (opts::BlockCorrection) { ++ uint64_t modified_total = DataReader::getModifiedBBTotal(); ++ outs() << "BOLT-INFO: total modified CFG BB count number is " ++ << modified_total << ".\n"; ++ } ++ + return Error::success(); + } + +@@ -555,6 +583,75 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, + return MatchRatio; + } + ++void generateChildrenParentCount(BinaryBasicBlock *BB) { ++ typedef GraphTraits GraphT; ++ ++ for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), ++ E = GraphT::child_end(BB); ++ CI != E; ++CI) { ++ typename GraphT::NodeRef Child = *CI; ++ BB->insertChildrenSet(Child); ++ Child->insertParentSet(BB); ++ } ++} ++ ++void generateChildrenParentCount(BinaryFunction &BF) { ++ for (BinaryBasicBlock &BB : BF) { ++ generateChildrenParentCount(&BB); ++ } ++} ++ ++uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryBasicBlock *BB, ++ float threshold) { ++ uint64_t modified = 0; ++ if (BB->getExecutionCount() != 0) { ++ return modified; ++ } ++ ++ std::vector input_string; ++ std::vector input_int64; ++ std::vector input_float; ++ ++ BinaryBasicBlockFeature BBF = BB->getFeatures(); ++ input_int64 = BBF.getInferenceFeatures(); ++ ++ if (input_int64.empty()) { ++ return 0; ++ } ++ ++ float model_pred = ++ dataReaderRef->ONNXInference(input_string, input_int64, input_float); ++ if (model_pred >= threshold) { ++ uint64_t min_neighbor_count = std::numeric_limits::max(); ++ for (BinaryBasicBlock *parent : BB->getParentSet()) { ++ if (parent->getExecutionCount() > 0 && ++ parent->getExecutionCount() < min_neighbor_count) ++ min_neighbor_count = parent->getExecutionCount(); ++ } ++ for (BinaryBasicBlock *child : BB->getChildrenSet()) { ++ if (child->getExecutionCount() > 0 && ++ child->getExecutionCount() < min_neighbor_count) ++ min_neighbor_count = child->getExecutionCount(); ++ } ++ if (min_neighbor_count != std::numeric_limits::max()) { ++ BB->setExecutionCount(min_neighbor_count); ++ modified = 1; ++ } ++ } ++ return modified; ++} ++ ++uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryFunction &BF, ++ float threshold) { ++ uint64_t modified_total_func = 0; ++ const auto &Order = BF.dfs(); ++ for (auto *BBA : Order) { ++ auto &BB = *BBA; ++ modified_total_func += estimateBBCount(dataReaderRef, &BB, threshold); ++ } ++ return modified_total_func; ++} ++ + void DataReader::readSampleData(BinaryFunction &BF) { + FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); + if (!SampleDataOrErr) +@@ -600,6 +697,17 @@ void DataReader::readSampleData(BinaryFunction &BF) { + + BF.ExecutionCount = TotalEntryCount; + ++ if (opts::BlockCorrection) { ++ generateChildrenParentCount(BF); ++ std::unique_ptr FM = ++ std::make_unique(opts::BlockCorrection); ++ FM->inferenceFeatures(BF); ++ ++ float threshold = DataReader::getThreshold(); ++ uint64_t modified_total_func = estimateBBCount(this, BF, threshold); ++ DataReader::addModifiedBBTotal(modified_total_func); ++ } ++ + estimateEdgeCounts(BF); + } + +diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp +index c6ea0b009..4191e18bd 100644 +--- a/bolt/lib/Rewrite/RewriteInstance.cpp ++++ b/bolt/lib/Rewrite/RewriteInstance.cpp +@@ -106,6 +106,12 @@ cl::opt DumpDotAll( + "enable '-print-loops' for color-coded blocks"), + cl::Hidden, cl::cat(BoltCategory)); + ++cl::opt BlockCorrection( ++ "block-correction", ++ cl::desc("capture features useful for ML model to inference the count on the binary basic block" ++ " and correct them on CFG."), ++ cl::ZeroOrMore, cl::cat(BoltOptCategory)); ++ + static cl::list + ForceFunctionNames("funcs", + cl::CommaSeparated, +-- +2.39.3 (Apple Git-146) + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index 1be40dc137f07cbb590ff7f3c7738f40bd559ee3..fd0a8f9ba36ab0e9603f87a5a75940abdf9a1c87 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -22,7 +22,7 @@ Name: %{pkg_name} Version: %{bolt_version} -Release: 1 +Release: 7 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache 2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt @@ -30,10 +30,12 @@ URL: https://github.com/llvm/llvm-project/tree/main/bolt Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz.sig -# BOLT is not respecting the component split of LLVM and requires some private -# headers in order to compile itself. Try to disable as much libraries as -# possible in order to reduce build time. -#Patch0: rm-llvm-libs.diff +Patch1: 0001-Fix-trap-value-for-non-X86.patch +Patch2: 0002-Add-test-for-emitting-trap-value.patch +Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch +Patch4: 0004-Bolt-Solving-pie-support-issue.patch +Patch5: 0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch +Patch6: 0006-AArch64-Add-CFG-block-count-correction-optimization.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -84,7 +86,6 @@ Documentation for the BOLT optimizer -DLLVM_TARGETS_TO_BUILD="AArch64" %endif - # Set LD_LIBRARY_PATH now because we skip rpath generation and the build uses # some just built libraries. export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib} @@ -104,7 +105,6 @@ find %{buildroot}%{install_prefix} \ ! -name "libbolt_rt_instr.a" \ -type f,l -exec rm -f '{}' \; - # Remove files installed during the build phase. rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a @@ -120,7 +120,7 @@ mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir} rm bolt/test/cache+-deprecated.test bolt/test/bolt-icf.test bolt/test/R_ABS.pic.lld.cpp %endif -export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}//%{_vpath_builddir}/%{_lib} +export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib} export DESTDIR=%{buildroot} %ninja_build check-bolt @@ -146,16 +146,48 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a %files doc %doc %{install_docdir} - %changelog -* Mon Dec 4 2023 zhoujing 17.0.6-1 -- Update to 17.0.6 +* Fri Jul 12 2024 rfwang07 17.0.6-7 +- Type:Feature +- ID:NA +- SUG:NA +- DESC: Add CFG block count correction optimization. + +* Fri Jun 21 2024 rfwang07 17.0.6-6 +- Type:Backport +- ID:NA +- SUG:NA +- DESC: Backport bugfix. + +* Tue Jun 18 2024 Xiong Zhou 17.0.6-5 +- Type:Feature +- ID:NA +- SUG:NA +- DESC: Add AArch64 support for inline. + +* Tue Jun 18 2024 Xiong Zhou 17.0.6-4 +- Type:Backport +- ID:NA +- SUG:NA +- DESC: Backport bugfix. + +* Tue Jun 18 2024 Xiong Zhou 17.0.6-3 +- Type:Update +- ID:NA +- SUG:NA +- DESC: Update to version 17.0.6 + +* Thu Sep 7 2023 Xiong Zhou 15.0.7-2 +- Type:Update +- ID:NA +- SUG:NA +- DESC: Backport bugfix and add AArch64 support for hugify and inline. -* Thu Jun 15 2023 Xiong Zhou 0-2.20211016.gitb72f753 -- Type:backport +* Thu Aug 31 2023 zhenyu zhao 15.0.7-1 +- Type:Update - ID:NA - SUG:NA -- DESC: Handle data at the beginning of a function when disassembling and building CFG. +- DESC: Update llvm-bolt from llvm-bolt to llvm-bolt-15 * Mon Dec 19 2022 liyancheng <412998149@qq.com> 0-1.20211016.gitb72f753 - Type:fix @@ -167,4 +199,4 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a - Type:Init - ID:NA - SUG:NA -- DESC:Init llvm-bolt repository \ No newline at end of file +- DESC:Init llvm-bolt repository