diff --git a/0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch b/0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch deleted file mode 100644 index 15398a9058bdbc3a6811dc4649e540ade6789654..0000000000000000000000000000000000000000 --- a/0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch +++ /dev/null @@ -1,135 +0,0 @@ -From c62ab1487115a74d72ad23fd89b42076d5726bde Mon Sep 17 00:00:00 2001 -From: xiongzhou4 -Date: Mon, 24 Jul 2023 19:47:46 +0800 -Subject: [PATCH] [AArch64] fix bug #55005 handle DW_CFA_GNU_NegateRAState. - backport: https://reviews.llvm.org/rG9921197920fc3e9ad9605bd8fe0e835ca2dd41a5 - ---- - bolt/lib/Core/Exceptions.cpp | 19 ++++-- - .../Inputs/dw_cfa_gnu_window_save.yaml | 62 +++++++++++++++++++ - bolt/test/AArch64/dw_cfa_gnu_window_save.test | 8 +++ - 3 files changed, 83 insertions(+), 6 deletions(-) - create mode 100644 bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml - create mode 100644 bolt/test/AArch64/dw_cfa_gnu_window_save.test - -diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp -index 79404ca87..b0aa8b990 100644 ---- a/bolt/lib/Core/Exceptions.cpp -+++ b/bolt/lib/Core/Exceptions.cpp -@@ -644,18 +644,25 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { - errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; - return false; - case DW_CFA_GNU_window_save: -+ // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same -+ // id but mean different things. The latter is used in AArch64. -+ if (Function.getBinaryContext().isAArch64()) { -+ Function.addCFIInstruction( -+ Offset, MCCFIInstruction::createNegateRAState(nullptr)); -+ break; -+ } -+ if (opts::Verbosity >= 1) -+ errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n"; -+ return false; - case DW_CFA_lo_user: - case DW_CFA_hi_user: -- if (opts::Verbosity >= 1) { -- errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user " -- "unimplemented\n"; -- } -+ if (opts::Verbosity >= 1) -+ errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n"; - return false; - default: -- if (opts::Verbosity >= 1) { -+ if (opts::Verbosity >= 1) - errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode - << '\n'; -- } - return false; - } - -diff --git a/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml -new file mode 100644 -index 000000000..faa32e089 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml -@@ -0,0 +1,62 @@ -+--- !ELF -+FileHeader: -+ Class: ELFCLASS64 -+ Data: ELFDATA2LSB -+ Type: ET_EXEC -+ Machine: EM_AARCH64 -+ Entry: 0x4100C0 -+ProgramHeaders: -+ - Type: PT_LOAD -+ Flags: [ PF_X, PF_R ] -+ FirstSec: .init -+ LastSec: .fini -+ VAddr: 0x410000 -+ Align: 0x10000 -+Sections: -+ - Name: .init -+ Type: SHT_PROGBITS -+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] -+ Address: 0x410000 -+ AddressAlign: 0x4 -+ Offset: 0x10000 -+ Content: 3F2303D5FD7BBFA9FD0300913F000094FD7BC1A8BF2303D5C0035FD6 -+ - Name: .plt -+ Type: SHT_PROGBITS -+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] -+ Address: 0x410020 -+ AddressAlign: 0x10 -+ Content: F07BBFA9700100F011FE47F910E23F9120021FD61F2003D51F2003D51F2003D590010090110240F91002009120021FD690010090110640F91022009120021FD690010090110A40F91042009120021FD6 -+ - Name: .text -+ Type: SHT_PROGBITS -+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] -+ Address: 0x410080 -+ AddressAlign: 0x40 -+ Content: 00008052C0035FD61F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D55F2403D51D0080D21E0080D2E50300AAE10340F9E2230091E60300910000009000D00391030080D2040080D2D5FFFF97D8FFFF975F2403D5E2FFFF171F2003D55F2403D5C0035FD6600100F000F047F9400000B4D3FFFF17C0035FD61F2003D5800100908101009000800091218000913F0000EBC000005481000090210840F9610000B4F00301AA00021FD6C0035FD680010090810100900080009121800091210000CB22FC7FD3410C818B21FC4193C10000B482000090420C40F9620000B4F00302AA00021FD6C0035FD63F2303D5FD7BBEA9FD030091F30B00F9930100906082403980000035DEFFFF972000805260820039F30B40F9FD7BC2A8BF2303D5C0035FD65F2403D5E2FFFF17 -+ - Name: .fini -+ Type: SHT_PROGBITS -+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] -+ Address: 0x4101CC -+ AddressAlign: 0x4 -+ Content: 3F2303D5FD7BBFA9FD030091FD7BC1A8BF2303D5C0035FD6 -+ - Name: .eh_frame -+ Type: SHT_PROGBITS -+ Flags: [ SHF_ALLOC ] -+ Address: 0x420068 -+ AddressAlign: 0x8 -+ Content: 1000000000000000017A520004781E011B0C1F0010000000180000003C00FFFF3C0000000041071E140000002C0000006800FFFF08000000000000000000000010000000440000007000FFFF300000000000000010000000580000008C00FFFF3C00000000000000240000006C000000B400FFFF3800000000412D410E209D049E0342930248DEDDD30E00412D0000001400000094000000C400FFFF08000000000000000000000010000000AC00000068FFFEFF080000000000000000000000 -+ - Name: .rela.text -+ Type: SHT_RELA -+ Flags: [ SHF_INFO_LINK ] -+ Link: .symtab -+ AddressAlign: 0x8 -+ Info: .text -+Symbols: -+ - Name: .text -+ Type: STT_SECTION -+ Section: .text -+ Value: 0x410080 -+ - Name: _ITM_deregisterTMCloneTable -+ Binding: STB_WEAK -+ - Name: _ITM_registerTMCloneTable -+ Binding: STB_WEAK -+... -diff --git a/bolt/test/AArch64/dw_cfa_gnu_window_save.test b/bolt/test/AArch64/dw_cfa_gnu_window_save.test -new file mode 100644 -index 000000000..2e044b399 ---- /dev/null -+++ b/bolt/test/AArch64/dw_cfa_gnu_window_save.test -@@ -0,0 +1,8 @@ -+# Check that llvm-bolt can handle DW_CFA_GNU_window_save on AArch64. -+ -+RUN: yaml2obj %p/Inputs/dw_cfa_gnu_window_save.yaml &> %t.exe -+RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s -+ -+CHECK-NOT: paciasp -+CHECK-NOT: autiasp -+CHECK-NOT: ERROR: unable to fill CFI. --- -2.33.0 - diff --git a/0001-Fix-trap-value-for-non-X86.patch b/0001-Fix-trap-value-for-non-X86.patch new file mode 100644 index 0000000000000000000000000000000000000000..83542e4a2372b0dafd3cff6cb91677e6335a9e1d --- /dev/null +++ b/0001-Fix-trap-value-for-non-X86.patch @@ -0,0 +1,126 @@ +From 868d8c360b3e1e5f291cb3e0dae0777a4529228f Mon Sep 17 00:00:00 2001 +From: Denis Revunov +Date: Thu, 27 Jul 2023 11:48:08 -0400 +Subject: [PATCH] Fix trap value for non-X86 + +The trap value used by BOLT was assumed to be single-byte instruction. +It made some functions unaligned on AArch64(e.g exceptions-instrumentation test) +and caused emission failures. Fix that by changing fill value to StringRef. + +Reviewed By: rafauler + +Differential Revision: https://reviews.llvm.org/D158191 +--- + bolt/include/bolt/Core/MCPlusBuilder.h | 9 ++++++--- + bolt/lib/Core/BinaryEmitter.cpp | 4 ++-- + bolt/lib/Rewrite/RewriteInstance.cpp | 6 ++++-- + bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 4 ++++ + bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 4 ++++ + bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 2 +- + 6 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h +index 56d0228cd..beb06751d 100644 +--- a/bolt/include/bolt/Core/MCPlusBuilder.h ++++ b/bolt/include/bolt/Core/MCPlusBuilder.h +@@ -636,9 +636,12 @@ public: + return false; + } + +- /// If non-zero, this is used to fill the executable space with instructions +- /// that will trap. Defaults to 0. +- virtual unsigned getTrapFillValue() const { return 0; } ++ /// Used to fill the executable space with instructions ++ /// that will trap. ++ virtual StringRef getTrapFillValue() const { ++ llvm_unreachable("not implemented"); ++ return StringRef(); ++ } + + /// Interface and basic functionality of a MCInstMatcher. The idea is to make + /// it easy to match one or more MCInsts against a tree-like pattern and +diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp +index c4129615a..df076c81d 100644 +--- a/bolt/lib/Core/BinaryEmitter.cpp ++++ b/bolt/lib/Core/BinaryEmitter.cpp +@@ -376,7 +376,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, + } + + if (opts::MarkFuncs) +- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1); ++ Streamer.emitBytes(BC.MIB->getTrapFillValue()); + + // Emit CFI end + if (Function.hasCFI()) +@@ -420,7 +420,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, + // case, the call site entries in that LSDA have 0 as offset to the landing + // pad, which the runtime interprets as "no handler". To prevent this, + // insert some padding. +- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1); ++ Streamer.emitBytes(BC.MIB->getTrapFillValue()); + } + + // Track the first emitted instruction with debug info. +diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp +index fe8c134b8..c6ea0b009 100644 +--- a/bolt/lib/Rewrite/RewriteInstance.cpp ++++ b/bolt/lib/Rewrite/RewriteInstance.cpp +@@ -5273,8 +5273,10 @@ void RewriteInstance::rewriteFile() { + if (!BF.getFileOffset() || !BF.isEmitted()) + continue; + OS.seek(BF.getFileOffset()); +- for (unsigned I = 0; I < BF.getMaxSize(); ++I) +- OS.write((unsigned char)BC->MIB->getTrapFillValue()); ++ StringRef TrapInstr = BC->MIB->getTrapFillValue(); ++ unsigned NInstr = BF.getMaxSize() / TrapInstr.size(); ++ for (unsigned I = 0; I < NInstr; ++I) ++ OS.write(TrapInstr.data(), TrapInstr.size()); + } + OS.seek(SavedPos); + } +diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +index acf21ba23..cd66b654e 100644 +--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp ++++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +@@ -1142,6 +1142,10 @@ public: + } + } + ++ StringRef getTrapFillValue() const override { ++ return StringRef("\0\0\0\0", 4); ++ } ++ + bool createReturn(MCInst &Inst) const override { + Inst.setOpcode(AArch64::RET); + Inst.clear(); +diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +index ec5bca852..badc1bde8 100644 +--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp ++++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +@@ -171,6 +171,10 @@ public: + return true; + } + ++ StringRef getTrapFillValue() const override { ++ return StringRef("\0\0\0\0", 4); ++ } ++ + bool analyzeBranch(InstructionIterator Begin, InstructionIterator End, + const MCSymbol *&TBB, const MCSymbol *&FBB, + MCInst *&CondBranch, +diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +index 3ee161d0b..5e3c01a1c 100644 +--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp ++++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +@@ -397,7 +397,7 @@ public: + } + } + +- unsigned getTrapFillValue() const override { return 0xCC; } ++ StringRef getTrapFillValue() const override { return StringRef("\314", 1); } + + struct IndJmpMatcherFrag1 : MCInstMatcher { + std::unique_ptr Base; +-- +2.33.0 + diff --git a/0002-AArch64-Add-AArch64-support-for-hugify.patch b/0002-AArch64-Add-AArch64-support-for-hugify.patch deleted file mode 100644 index b4adef69537cb26bc3d88f7b7ef29667b0b4a52a..0000000000000000000000000000000000000000 --- a/0002-AArch64-Add-AArch64-support-for-hugify.patch +++ /dev/null @@ -1,465 +0,0 @@ -From 81a80dbe9f47f728bc593d05cd5708a653a23f1c Mon Sep 17 00:00:00 2001 -From: xiongzhou4 -Date: Mon, 11 Sep 2023 11:33:41 +0800 -Subject: [PATCH] [AArch64] Add AArch64 support for hugify. - ---- - bolt/CMakeLists.txt | 4 +- - bolt/runtime/CMakeLists.txt | 28 ++- - bolt/runtime/common.h | 224 ++++++++++++++++++ - bolt/runtime/hugify.cpp | 21 +- - .../AArch64/Inputs/user_func_order.txt | 2 + - bolt/test/runtime/AArch64/user-func-reorder.c | 44 ++++ - 6 files changed, 305 insertions(+), 18 deletions(-) - create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt - create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c - -diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt -index a97878cd3..3de930496 100644 ---- a/bolt/CMakeLists.txt -+++ b/bolt/CMakeLists.txt -@@ -5,7 +5,7 @@ set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) - set(CMAKE_CXX_STANDARD 14) - - set(BOLT_ENABLE_RUNTIME OFF) --if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") -+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|aarch64") - set(BOLT_ENABLE_RUNTIME ON) - endif() - -@@ -45,7 +45,7 @@ if (LLVM_INCLUDE_TESTS) - endif() - - if (BOLT_ENABLE_RUNTIME) -- message(STATUS "Building BOLT runtime libraries for X86") -+ message(STATUS "Building BOLT runtime libraries") - ExternalProject_Add(bolt_rt - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime" - STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps -diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt -index 7c1b79af4..ee6ab7bd4 100644 ---- a/bolt/runtime/CMakeLists.txt -+++ b/bolt/runtime/CMakeLists.txt -@@ -10,10 +10,12 @@ check_include_files(elf.h HAVE_ELF_H) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in - ${CMAKE_CURRENT_BINARY_DIR}/config.h) - --add_library(bolt_rt_instr STATIC -- instr.cpp -- ${CMAKE_CURRENT_BINARY_DIR}/config.h -- ) -+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") -+ add_library(bolt_rt_instr STATIC -+ instr.cpp -+ ${CMAKE_CURRENT_BINARY_DIR}/config.h -+ ) -+endif() - add_library(bolt_rt_hugify STATIC - hugify.cpp - ${CMAKE_CURRENT_BINARY_DIR}/config.h -@@ -23,16 +25,24 @@ set(BOLT_RT_FLAGS - -ffreestanding - -fno-exceptions - -fno-rtti -- -fno-stack-protector -- -mno-sse) -+ -fno-stack-protector) -+ -+# x86 exclusive option -+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") -+ list(APPEND BOLT_RT_FLAGS -mno-sse) -+endif() - - # Don't let the compiler think it can create calls to standard libs --target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) --target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") -+ target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) -+ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -+endif() - target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS}) - target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - --install(TARGETS bolt_rt_instr DESTINATION lib) -+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") -+ install(TARGETS bolt_rt_instr DESTINATION lib) -+endif() - install(TARGETS bolt_rt_hugify DESTINATION lib) - - if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*") -diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h -index 008dbb6c3..6869742e7 100644 ---- a/bolt/runtime/common.h -+++ b/bolt/runtime/common.h -@@ -39,6 +39,45 @@ typedef int int32_t; - #endif - - // Save all registers while keeping 16B stack alignment -+#if defined (__aarch64__) -+#define SAVE_ALL \ -+ "stp x0, x1, [sp, #-16]!\n" \ -+ "stp x2, x3, [sp, #-16]!\n" \ -+ "stp x4, x5, [sp, #-16]!\n" \ -+ "stp x6, x7, [sp, #-16]!\n" \ -+ "stp x8, x9, [sp, #-16]!\n" \ -+ "stp x10, x11, [sp, #-16]!\n" \ -+ "stp x12, x13, [sp, #-16]!\n" \ -+ "stp x14, x15, [sp, #-16]!\n" \ -+ "stp x16, x17, [sp, #-16]!\n" \ -+ "stp x18, x19, [sp, #-16]!\n" \ -+ "stp x20, x21, [sp, #-16]!\n" \ -+ "stp x22, x23, [sp, #-16]!\n" \ -+ "stp x24, x25, [sp, #-16]!\n" \ -+ "stp x26, x27, [sp, #-16]!\n" \ -+ "stp x28, x29, [sp, #-16]!\n" \ -+ "stp x30, xzr, [sp, #-16]!\n" -+ -+// Mirrors SAVE_ALL -+#define RESTORE_ALL \ -+ "ldp x30, xzr, [sp], #16\n" \ -+ "ldp x28, x29, [sp], #16\n" \ -+ "ldp x26, x27, [sp], #16\n" \ -+ "ldp x24, x25, [sp], #16\n" \ -+ "ldp x22, x23, [sp], #16\n" \ -+ "ldp x20, x21, [sp], #16\n" \ -+ "ldp x18, x19, [sp], #16\n" \ -+ "ldp x16, x17, [sp], #16\n" \ -+ "ldp x14, x15, [sp], #16\n" \ -+ "ldp x12, x13, [sp], #16\n" \ -+ "ldp x10, x11, [sp], #16\n" \ -+ "ldp x8, x9, [sp], #16\n" \ -+ "ldp x6, x7, [sp], #16\n" \ -+ "ldp x4, x5, [sp], #16\n" \ -+ "ldp x2, x3, [sp], #16\n" \ -+ "ldp x0, x1, [sp], #16\n" -+ -+#else - #define SAVE_ALL \ - "push %%rax\n" \ - "push %%rbx\n" \ -@@ -75,6 +114,7 @@ typedef int int32_t; - "pop %%rcx\n" \ - "pop %%rbx\n" \ - "pop %%rax\n" -+#endif - - // Functions that are required by freestanding environment. Compiler may - // generate calls to these implicitly. -@@ -129,6 +169,189 @@ constexpr uint32_t BufSize = 10240; - #define _STRINGIFY(x) #x - #define STRINGIFY(x) _STRINGIFY(x) - -+#if defined (__aarch64__) -+// Declare some syscall wrappers we use throughout this code to avoid linking -+// against system libc. -+uint64_t __read(uint64_t fd, const void *buf, uint64_t count) { -+ uint64_t ret; -+ register uint64_t x0 __asm__("x0") = fd; -+ register const void *x1 __asm__("x1") = buf; -+ register uint64_t x2 __asm__("x2") = count; -+ register uint32_t w8 __asm__("w8") = 63; -+ __asm__ __volatile__("svc #0\n" -+ "mov %0, x0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(x2), "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+uint64_t __write(uint64_t fd, const void *buf, uint64_t count) { -+ uint64_t ret; -+ register uint64_t x0 __asm__("x0") = fd; -+ register const void *x1 __asm__("x1") = buf; -+ register uint64_t x2 __asm__("x2") = count; -+ register uint32_t w8 __asm__("w8") = 64; -+ __asm__ __volatile__("svc #0\n" -+ "mov %0, x0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(x2), "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags, -+ uint64_t fd, uint64_t offset) { -+ void *ret; -+ register uint64_t x0 __asm__("x0") = addr; -+ register uint64_t x1 __asm__("x1") = size; -+ register uint64_t x2 __asm__("x2") = prot; -+ register uint64_t x3 __asm__("x3") = flags; -+ register uint64_t x4 __asm__("x4") = fd; -+ register uint64_t x5 __asm__("x5") = offset; -+ register uint32_t w8 __asm__("w8") = 222; -+ __asm__ __volatile__("svc #0\n" -+ "mov %0, x0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+uint64_t __munmap(void *addr, uint64_t size) { -+ uint64_t ret; -+ register void *x0 __asm__("x0") = addr; -+ register uint64_t x1 __asm__("x1") = size; -+ register uint32_t w8 __asm__("w8") = 215; -+ __asm__ __volatile__("svc #0\n" -+ "mov %0, x0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+uint64_t __exit(uint64_t code) { -+ uint64_t ret; -+ register uint64_t x0 __asm__("x0") = code; -+ register uint32_t w8 __asm__("w8") = 94; -+ __asm__ __volatile__("svc #0\n" -+ "mov %0, x0" -+ : "=r"(ret), "+r"(x0) -+ : "r"(w8) -+ : "cc", "memory", "x1"); -+ return ret; -+} -+ -+uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) { -+ uint64_t ret; -+ register int x0 __asm__("x0") = -100; -+ register const char *x1 __asm__("x1") = pathname; -+ register uint64_t x2 __asm__("x2") = flags; -+ register uint64_t x3 __asm__("x3") = mode; -+ register uint32_t w8 __asm__("w8") = 56; -+ __asm__ __volatile__("svc #0\n" -+ "mov %0, x0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(x2), "r"(x3), "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+int __madvise(void *addr, size_t length, int advice) { -+ int ret; -+ register void *x0 __asm__("x0") = addr; -+ register size_t x1 __asm__("x1") = length; -+ register int x2 __asm__("x2") = advice; -+ register uint32_t w8 __asm__("w8") = 233; -+ __asm__ __volatile__("svc #0\n" -+ "mov %w0, w0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(x2), "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+int __mprotect(void *addr, size_t len, int prot) { -+ int ret; -+ register void *x0 __asm__("x0") = addr; -+ register size_t x1 __asm__("x1") = len; -+ register int x2 __asm__("x2") = prot; -+ register uint32_t w8 __asm__("w8") = 226; -+ __asm__ __volatile__("svc #0\n" -+ "mov %w0, w0" -+ : "=r"(ret), "+r"(x0), "+r"(x1) -+ : "r"(x2), "r"(w8) -+ : "cc", "memory"); -+ return ret; -+} -+ -+// Helper functions for writing strings to the .fdata file. We intentionally -+// avoid using libc names to make it clear it is our impl. -+ -+/// Compare two strings, at most Num bytes. -+int strnCmp(const char *Str1, const char *Str2, size_t Num) { -+ while (Num && *Str1 && (*Str1 == *Str2)) { -+ Num--; -+ Str1++; -+ Str2++; -+ } -+ if (Num == 0) -+ return 0; -+ return *(unsigned char *)Str1 - *(unsigned char *)Str2; -+} -+ -+uint32_t strLen(const char *Str) { -+ uint32_t Size = 0; -+ while (*Str++) -+ ++Size; -+ return Size; -+} -+ -+/// Write number Num using Base to the buffer in OutBuf, returns a pointer to -+/// the end of the string. -+char *intToStr(char *OutBuf, uint64_t Num, uint32_t Base) { -+ const char *Chars = "0123456789abcdef"; -+ char Buf[21]; -+ char *Ptr = Buf; -+ while (Num) { -+ *Ptr++ = *(Chars + (Num % Base)); -+ Num /= Base; -+ } -+ if (Ptr == Buf) { -+ *OutBuf++ = '0'; -+ return OutBuf; -+ } -+ while (Ptr != Buf) -+ *OutBuf++ = *--Ptr; -+ -+ return OutBuf; -+} -+ -+/// Copy Str to OutBuf, returns a pointer to the end of the copied string -+char *strCopy(char *OutBuf, const char *Str, int32_t Size = BufSize) { -+ while (*Str) { -+ *OutBuf++ = *Str++; -+ if (--Size <= 0) -+ return OutBuf; -+ } -+ return OutBuf; -+} -+ -+void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) { -+ char Buf[BufSize]; -+ char *Ptr = Buf; -+ Ptr = strCopy(Ptr, Msg, BufSize - 23); -+ Ptr = intToStr(Ptr, Num, Base); -+ Ptr = strCopy(Ptr, "\n"); -+ __write(2, Buf, Ptr - Buf); -+} -+ -+void reportError(const char *Msg, uint64_t Size) { -+ __write(2, Msg, Size); -+ __exit(1); -+} -+#else - uint64_t __read(uint64_t fd, const void *buf, uint64_t count) { - uint64_t ret; - #if defined(__APPLE__) -@@ -550,5 +773,6 @@ public: - inline uint64_t alignTo(uint64_t Value, uint64_t Align) { - return (Value + Align - 1) / Align * Align; - } -+#endif - - } // anonymous namespace -diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp -index 69e1a7e06..385e4d147 100644 ---- a/bolt/runtime/hugify.cpp -+++ b/bolt/runtime/hugify.cpp -@@ -6,26 +6,25 @@ - // - //===----------------------------------------------------------------------===// - --#if defined (__x86_64__) - #if !defined(__APPLE__) - - #include "common.h" - #include - - // Enables a very verbose logging to stderr useful when debugging --//#define ENABLE_DEBUG -+// #define ENABLE_DEBUG - - // Function pointers to init routines in the binary, so we can resume - // regular execution of the function that we hooked. - extern void (*__bolt_hugify_init_ptr)(); - - // The __hot_start and __hot_end symbols set by Bolt. We use them to figure --// out the rage for marking huge pages. -+// out the range for marking huge pages. - extern uint64_t __hot_start; - extern uint64_t __hot_end; - - #ifdef MADV_HUGEPAGE --/// Check whether the kernel supports THP via corresponding sysfs entry. -+// Check whether the kernel supports THP via corresponding sysfs entry. - static bool has_pagecache_thp_support() { - char buf[256] = {0}; - const char *madviseStr = "always [madvise] never"; -@@ -116,14 +115,22 @@ extern "C" void __bolt_hugify_self_impl() { - #endif - } - --/// This is hooking ELF's entry, it needs to save all machine state. -+// This is hooking ELF's entry, it needs to save all machine state. - extern "C" __attribute((naked)) void __bolt_hugify_self() { -+#if defined (__x86_64__) - __asm__ __volatile__(SAVE_ALL - "call __bolt_hugify_self_impl\n" - RESTORE_ALL - "jmp *__bolt_hugify_init_ptr(%%rip)\n" - :::); --} -- -+#elif defined (__aarch64__) -+ __asm__ __volatile__(SAVE_ALL -+ "bl __bolt_hugify_self_impl\n" -+ RESTORE_ALL -+ "ldr x16, =__bolt_hugify_init_ptr\n" -+ "ldr x16, [x16]\n" -+ "br x16\n" -+ :::); - #endif -+} - #endif -diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt -new file mode 100644 -index 000000000..48b76cd35 ---- /dev/null -+++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt -@@ -0,0 +1,2 @@ -+main -+fib -diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c -new file mode 100644 -index 000000000..fcb92bca1 ---- /dev/null -+++ b/bolt/test/runtime/AArch64/user-func-reorder.c -@@ -0,0 +1,44 @@ -+/* Checks that BOLT correctly processes a user-provided function list file, -+ * reorder functions according to this list, update hot_start and hot_end -+ * symbols and insert a function to perform hot text mapping during program -+ * startup. -+ */ -+#include -+ -+int foo(int x) { -+ return x + 1; -+} -+ -+int fib(int x) { -+ if (x < 2) -+ return x; -+ return fib(x - 1) + fib(x - 2); -+} -+ -+int bar(int x) { -+ return x - 1; -+} -+ -+int main(int argc, char **argv) { -+ printf("fib(%d) = %d\n", argc, fib(argc)); -+ return 0; -+} -+ -+/* -+REQUIRES: system-linux,bolt-runtime -+ -+RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q -+ -+RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ -+RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t -+RUN: llvm-nm --numeric-sort --print-armap %t | \ -+RUN: FileCheck %s -check-prefix=CHECK-NM -+RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT -+ -+CHECK-NM: W __hot_start -+CHECK-NM: T main -+CHECK-NM-NEXT: T fib -+CHECK-NM-NEXT: W __hot_end -+ -+CHECK-OUTPUT: fib(4) = 3 -+*/ --- -2.33.0 - diff --git a/0002-Add-test-for-emitting-trap-value.patch b/0002-Add-test-for-emitting-trap-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..8cc1c6d8308dc848072e0b6be000f9fc12c96068 --- /dev/null +++ b/0002-Add-test-for-emitting-trap-value.patch @@ -0,0 +1,44 @@ +From e4ae238a42296a84bc819dd1fb61f3c699952f17 Mon Sep 17 00:00:00 2001 +From: Denis Revunov +Date: Thu, 17 Aug 2023 18:30:07 +0300 +Subject: [PATCH] Add test for emitting trap value + +Reviewed By: rafauler + +Differential Revision: https://reviews.llvm.org/D158191 +--- + bolt/test/runtime/mark-funcs.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + create mode 100644 bolt/test/runtime/mark-funcs.c + +diff --git a/bolt/test/runtime/mark-funcs.c b/bolt/test/runtime/mark-funcs.c +new file mode 100644 +index 000000000..a8586ca8b +--- /dev/null ++++ b/bolt/test/runtime/mark-funcs.c +@@ -0,0 +1,22 @@ ++#include ++ ++int dummy() { ++ printf("Dummy called\n"); ++ return 0; ++} ++ ++int main(int argc, char **argv) { ++ if (dummy() != 0) ++ return 1; ++ printf("Main called\n"); ++ return 0; ++} ++// Check that emitting trap value works properly and ++// does not break functions ++// REQUIRES: system-linux ++// RUN: %clangxx -Wl,-q %s -o %t.exe ++// RUN: %t.exe | FileCheck %s ++// CHECK: Dummy called ++// CHECK-NEXT: Main called ++// RUN: llvm-bolt %t.exe -o %t.exe.bolt -lite=false --mark-funcs ++// RUN: %t.exe.bolt | FileCheck %s +-- +2.33.0 + diff --git a/0003-AArch64-Add-AArch64-support-for-inline.patch b/0003-AArch64-Add-AArch64-support-for-inline.patch index 2ce33e98d6389ccf22eeb5fb0fcf27d89e1f8efd..cb64595fbbeddb127f769c8facf7676b1178cdf9 100644 --- a/0003-AArch64-Add-AArch64-support-for-inline.patch +++ b/0003-AArch64-Add-AArch64-support-for-inline.patch @@ -1,259 +1,274 @@ -From b26ff1c328435d7b0ceccec1dcc25252821ad373 Mon Sep 17 00:00:00 2001 -From: xiongzhou4 -Date: Mon, 11 Sep 2023 14:43:12 +0800 -Subject: [PATCH] [AArch64] Add AArch64 support for inline. - ---- - bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ - .../Target/AArch64/AArch64MCPlusBuilder.cpp | 12 +++++++ - bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ - bolt/test/AArch64/Inputs/inline-main.c | 5 +++ - bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ - bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ - bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ - bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ - bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ - bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ - bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ - bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ - 12 files changed, 137 insertions(+) - create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c - create mode 100644 bolt/test/AArch64/Inputs/inline-main.c - create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp - create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp - create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp - create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp - create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp - create mode 100644 bolt/test/AArch64/inline-debug-info.test - create mode 100644 bolt/test/AArch64/inlined-function-mixed.test - create mode 100644 bolt/test/AArch64/jmp-optimization.test - -diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp -index 04232bd3e..d009d59dc 100644 ---- a/bolt/lib/Passes/Inliner.cpp -+++ b/bolt/lib/Passes/Inliner.cpp -@@ -464,6 +464,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { - << ". Size change: " << SizeAfterInlining - << " bytes.\n"); - -+// Skip situations where some A64 instructions can't be inlined: -+// # Indirect branch, e.g., BR. -+// # Branch instructions but used to make a function call. -+ if (BC.isAArch64()) { -+ auto &MIB = *BC.MIB; -+ bool skip = false; -+ for (const BinaryBasicBlock &BB : *TargetFunction) { -+ for (MCInst Inst : BB) { -+ if (MIB.isPseudo(Inst)) -+ continue; -+ -+ MIB.stripAnnotations(Inst, false); -+ -+ if (MIB.isBranch(Inst)) { -+ const BinaryBasicBlock *TargetBB = -+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); -+ if (MIB.isIndirectBranch(Inst) || !TargetBB) { -+ skip = true; -+ break; -+ } -+ } -+ } -+ if (skip) -+ break; -+ } -+ if (skip) { -+ ++InstIt; -+ continue; -+ } -+ } -+ - std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); - - DidInlining = true; -diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp -index c736196a8..03b1b536f 100644 ---- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp -+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp -@@ -34,6 +34,16 @@ public: - const MCRegisterInfo *RegInfo) - : MCPlusBuilder(Analysis, Info, RegInfo) {} - -+ MCPhysReg getStackPointer() const override { return AArch64::SP; } -+ -+ bool createCall(MCInst &Inst, const MCSymbol *Target, -+ MCContext *Ctx) override { -+ Inst.setOpcode(AArch64::BL); -+ Inst.addOperand(MCOperand::createExpr( -+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); -+ return true; -+ } -+ - bool equals(const MCTargetExpr &A, const MCTargetExpr &B, - CompFuncTy Comp) const override { - const auto &AArch64ExprA = cast(A); -@@ -1103,6 +1113,8 @@ public: - - bool isLeave(const MCInst &Inst) const override { return false; } - -+ bool isPush(const MCInst &Inst) const override { return false; } -+ - bool isPop(const MCInst &Inst) const override { return false; } - - bool isPrefix(const MCInst &Inst) const override { return false; } -diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c -new file mode 100644 -index 000000000..1307c13f2 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/inline-foo.c -@@ -0,0 +1,5 @@ -+#include "stub.h" -+ -+void foo() { -+ puts("Hello world!\n"); -+} -diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c -new file mode 100644 -index 000000000..7853d2b63 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/inline-main.c -@@ -0,0 +1,5 @@ -+extern void foo(); -+int main() { -+ foo(); -+ return 0; -+} -diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp -new file mode 100644 -index 000000000..a6ff9e262 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/inlined.cpp -@@ -0,0 +1,23 @@ -+extern "C" int printf(const char*, ...); -+extern const char* question(); -+ -+inline int answer() __attribute__((always_inline)); -+inline int answer() { return 42; } -+ -+int main(int argc, char *argv[]) { -+ int ans; -+ if (argc == 1) { -+ ans = 0; -+ } else { -+ ans = argc; -+ } -+ printf("%s\n", question()); -+ for (int i = 0; i < 10; ++i) { -+ int x = answer(); -+ int y = answer(); -+ ans += x - y; -+ } -+ // padding to make sure question() is inlineable -+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); -+ return ans; -+} -diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp -new file mode 100644 -index 000000000..edb7ab145 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/inlinee.cpp -@@ -0,0 +1,3 @@ -+const char* question() { -+ return "What do you get if you multiply six by nine?"; -+} -diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp -new file mode 100644 -index 000000000..cd6d53c35 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp -@@ -0,0 +1,7 @@ -+int g(); -+ -+int main() { -+ int x = g(); -+ int y = x*x; -+ return y; -+} -diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp -new file mode 100644 -index 000000000..80b853d63 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp -@@ -0,0 +1,3 @@ -+int f() { -+ return 0; -+} -diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp -new file mode 100644 -index 000000000..7fb551163 ---- /dev/null -+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp -@@ -0,0 +1,3 @@ -+int f(); -+ -+int g() { return f(); } -diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test -new file mode 100644 -index 000000000..e20e5e31e ---- /dev/null -+++ b/bolt/test/AArch64/inline-debug-info.test -@@ -0,0 +1,20 @@ -+## Check that BOLT correctly prints and updates debug info for inlined -+## functions. -+ -+# REQUIRES: system-linux -+ -+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ -+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q -+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ -+# RUN: --print-only=main --print-after-lowering --force-inline=foo \ -+# RUN: -o %t.bolt \ -+# RUN: | FileCheck %s -+ -+## The call to puts() should come from inline-foo.c: -+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 -+ -+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ -+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP -+ -+## Dump of main() should include debug info from inline-foo.c after inlining: -+# CHECK-OBJDUMP: inline-foo.c:4 -diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test -new file mode 100644 -index 000000000..5a87bdde9 ---- /dev/null -+++ b/bolt/test/AArch64/inlined-function-mixed.test -@@ -0,0 +1,11 @@ -+# Make sure inlining from a unit with debug info into unit without -+# debug info does not cause a crash. -+ -+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o -+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g -+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t -+ -+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ -+RUN: --inline-small-functions --force-inline=main | FileCheck %s -+ -+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten -diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test -new file mode 100644 -index 000000000..92f4b9a14 ---- /dev/null -+++ b/bolt/test/AArch64/jmp-optimization.test -@@ -0,0 +1,14 @@ -+# Tests the optimization of functions that just do a tail call in the beginning. -+ -+# This test has commands that rely on shell capabilities that won't execute -+# correctly on Windows e.g. unsupported parameter expansion -+REQUIRES: shell -+ -+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t -+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt -+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s -+ -+CHECK:
: -+CHECK-NOT: call -+CHECK: xorl %eax, %eax -+CHECK: retq --- -2.33.0 - +From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001 +From: xiongzhou4 +Date: Wed, 12 Jun 2024 17:12:36 +0800 +Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline. + +--- + bolt/include/bolt/Core/MCPlusBuilder.h | 5 +-- + bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ + .../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++ + bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ + bolt/test/AArch64/Inputs/inline-main.c | 5 +++ + bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ + bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ + bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ + bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ + bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ + bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ + 13 files changed, 136 insertions(+), 4 deletions(-) + create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c + create mode 100644 bolt/test/AArch64/Inputs/inline-main.c + create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp + create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp + create mode 100644 bolt/test/AArch64/inline-debug-info.test + create mode 100644 bolt/test/AArch64/inlined-function-mixed.test + create mode 100644 bolt/test/AArch64/jmp-optimization.test + +diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h +index db3f7e7f1..56d0228cd 100644 +--- a/bolt/include/bolt/Core/MCPlusBuilder.h ++++ b/bolt/include/bolt/Core/MCPlusBuilder.h +@@ -573,10 +573,7 @@ public: + return 0; + } + +- virtual bool isPush(const MCInst &Inst) const { +- llvm_unreachable("not implemented"); +- return false; +- } ++ virtual bool isPush(const MCInst &Inst) const { return false; } + + /// Return the width, in bytes, of the memory access performed by \p Inst, if + /// this is a push instruction. Return zero otherwise. +diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp +index 8dcb8934f..67dd294fb 100644 +--- a/bolt/lib/Passes/Inliner.cpp ++++ b/bolt/lib/Passes/Inliner.cpp +@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { + << ". Size change: " << SizeAfterInlining + << " bytes.\n"); + ++// Skip situations where some A64 instructions can't be inlined: ++// # Indirect branch, e.g., BR. ++// # Branch instructions but used to make a function call. ++ if (BC.isAArch64()) { ++ auto &MIB = *BC.MIB; ++ bool skip = false; ++ for (const BinaryBasicBlock &BB : *TargetFunction) { ++ for (MCInst Inst : BB) { ++ if (MIB.isPseudo(Inst)) ++ continue; ++ ++ MIB.stripAnnotations(Inst, false); ++ ++ if (MIB.isBranch(Inst)) { ++ const BinaryBasicBlock *TargetBB = ++ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); ++ if (MIB.isIndirectBranch(Inst) || !TargetBB) { ++ skip = true; ++ break; ++ } ++ } ++ } ++ if (skip) ++ break; ++ } ++ if (skip) { ++ ++InstIt; ++ continue; ++ } ++ } ++ + std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); + + DidInlining = true; +diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +index d109a5d52..acf21ba23 100644 +--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp ++++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +@@ -34,6 +34,8 @@ public: + const MCRegisterInfo *RegInfo) + : MCPlusBuilder(Analysis, Info, RegInfo) {} + ++ MCPhysReg getStackPointer() const override { return AArch64::SP; } ++ + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, + CompFuncTy Comp) const override { + const auto &AArch64ExprA = cast(A); +@@ -816,6 +818,14 @@ public: + + int getUncondBranchEncodingSize() const override { return 28; } + ++ bool createCall(MCInst &Inst, const MCSymbol *Target, ++ MCContext *Ctx) override { ++ Inst.setOpcode(AArch64::BL); ++ Inst.addOperand(MCOperand::createExpr( ++ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); ++ return true; ++ } ++ + bool createTailCall(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + Inst.setOpcode(AArch64::B); +diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c +new file mode 100644 +index 000000000..1307c13f2 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-foo.c +@@ -0,0 +1,5 @@ ++#include "stub.h" ++ ++void foo() { ++ puts("Hello world!\n"); ++} +diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c +new file mode 100644 +index 000000000..7853d2b63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-main.c +@@ -0,0 +1,5 @@ ++extern void foo(); ++int main() { ++ foo(); ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp +new file mode 100644 +index 000000000..a6ff9e262 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlined.cpp +@@ -0,0 +1,23 @@ ++extern "C" int printf(const char*, ...); ++extern const char* question(); ++ ++inline int answer() __attribute__((always_inline)); ++inline int answer() { return 42; } ++ ++int main(int argc, char *argv[]) { ++ int ans; ++ if (argc == 1) { ++ ans = 0; ++ } else { ++ ans = argc; ++ } ++ printf("%s\n", question()); ++ for (int i = 0; i < 10; ++i) { ++ int x = answer(); ++ int y = answer(); ++ ans += x - y; ++ } ++ // padding to make sure question() is inlineable ++ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); ++ return ans; ++} +diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp +new file mode 100644 +index 000000000..edb7ab145 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlinee.cpp +@@ -0,0 +1,3 @@ ++const char* question() { ++ return "What do you get if you multiply six by nine?"; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp +new file mode 100644 +index 000000000..cd6d53c35 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp +@@ -0,0 +1,7 @@ ++int g(); ++ ++int main() { ++ int x = g(); ++ int y = x*x; ++ return y; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +new file mode 100644 +index 000000000..80b853d63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +@@ -0,0 +1,3 @@ ++int f() { ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +new file mode 100644 +index 000000000..7fb551163 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +@@ -0,0 +1,3 @@ ++int f(); ++ ++int g() { return f(); } +diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test +new file mode 100644 +index 000000000..e20e5e31e +--- /dev/null ++++ b/bolt/test/AArch64/inline-debug-info.test +@@ -0,0 +1,20 @@ ++## Check that BOLT correctly prints and updates debug info for inlined ++## functions. ++ ++# REQUIRES: system-linux ++ ++# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ ++# RUN: -I%p/../Inputs -o %t.exe -Wl,-q ++# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ ++# RUN: --print-only=main --print-after-lowering --force-inline=foo \ ++# RUN: -o %t.bolt \ ++# RUN: | FileCheck %s ++ ++## The call to puts() should come from inline-foo.c: ++# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 ++ ++# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ ++# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP ++ ++## Dump of main() should include debug info from inline-foo.c after inlining: ++# CHECK-OBJDUMP: inline-foo.c:4 +diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test +new file mode 100644 +index 000000000..5a87bdde9 +--- /dev/null ++++ b/bolt/test/AArch64/inlined-function-mixed.test +@@ -0,0 +1,11 @@ ++# Make sure inlining from a unit with debug info into unit without ++# debug info does not cause a crash. ++ ++RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o ++RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g ++RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t ++ ++RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ ++RUN: --inline-small-functions --force-inline=main | FileCheck %s ++ ++CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten +diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test +new file mode 100644 +index 000000000..92f4b9a14 +--- /dev/null ++++ b/bolt/test/AArch64/jmp-optimization.test +@@ -0,0 +1,14 @@ ++# Tests the optimization of functions that just do a tail call in the beginning. ++ ++# This test has commands that rely on shell capabilities that won't execute ++# correctly on Windows e.g. unsupported parameter expansion ++REQUIRES: shell ++ ++RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t ++RUN: llvm-bolt -inline-small-functions %t -o %t.bolt ++RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s ++ ++CHECK:
: ++CHECK-NOT: call ++CHECK: xorl %eax, %eax ++CHECK: retq +-- +2.33.0 + diff --git a/0004-Bolt-Solving-pie-support-issue.patch b/0004-Bolt-Solving-pie-support-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..b26d9fcdf1eb2fbfc4f02094a06ffcf3ea1fae2c --- /dev/null +++ b/0004-Bolt-Solving-pie-support-issue.patch @@ -0,0 +1,170 @@ +From a28084a4adff2340dd02c2c0c42f4997f76b3ffa Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Fri, 21 Jun 2024 11:16:44 +0800 +Subject: [PATCH] [Bolt] Solving pie support issue + +--- + bolt/lib/Core/BinaryContext.cpp | 25 +++++++++++++++++++---- + bolt/test/perf2bolt/Inputs/perf_test.c | 26 ++++++++++++++++++++++++ + bolt/test/perf2bolt/Inputs/perf_test.lds | 13 ++++++++++++ + bolt/test/perf2bolt/lit.local.cfg | 4 ++++ + bolt/test/perf2bolt/perf_test.test | 17 ++++++++++++++++ + bolt/unittests/Core/BinaryContext.cpp | 21 +++++++++++++++++++ + 6 files changed, 102 insertions(+), 4 deletions(-) + create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.c + create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.lds + create mode 100644 bolt/test/perf2bolt/lit.local.cfg + create mode 100644 bolt/test/perf2bolt/perf_test.test + +diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp +index 2d2b35ee2..ab9f0b844 100644 +--- a/bolt/lib/Core/BinaryContext.cpp ++++ b/bolt/lib/Core/BinaryContext.cpp +@@ -1880,10 +1880,27 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, + // Find a segment with a matching file offset. + for (auto &KV : SegmentMapInfo) { + const SegmentInfo &SegInfo = KV.second; +- if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { +- // Use segment's aligned memory offset to calculate the base address. +- const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); +- return MMapAddress - MemOffset; ++ // FileOffset is got from perf event, ++ // and it is equal to alignDown(SegInfo.FileOffset, pagesize). ++ // If the pagesize is not equal to SegInfo.Alignment. ++ // FileOffset and SegInfo.FileOffset should be aligned first, ++ // and then judge whether they are equal. ++ if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == ++ alignDown(FileOffset, SegInfo.Alignment)) { ++ // The function's offset from base address in VAS is aligned by pagesize ++ // instead of SegInfo.Alignment. Pagesize can't be got from perf events. ++ // However, The ELF document says that SegInfo.FileOffset should equal ++ // to SegInfo.Address, modulo the pagesize. ++ // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf ++ ++ // So alignDown(SegInfo.Address, pagesize) can be calculated by: ++ // alignDown(SegInfo.Address, pagesize) ++ // = SegInfo.Address - (SegInfo.Address % pagesize) ++ // = SegInfo.Address - (SegInfo.FileOffset % pagesize) ++ // = SegInfo.Address - SegInfo.FileOffset + ++ // alignDown(SegInfo.FileOffset, pagesize) ++ // = SegInfo.Address - SegInfo.FileOffset + FileOffset ++ return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); + } + } + +diff --git a/bolt/test/perf2bolt/Inputs/perf_test.c b/bolt/test/perf2bolt/Inputs/perf_test.c +new file mode 100644 +index 000000000..ff5ecf7a8 +--- /dev/null ++++ b/bolt/test/perf2bolt/Inputs/perf_test.c +@@ -0,0 +1,26 @@ ++#include ++#include ++#include ++ ++int add(int a, int b) { return a + b; } ++int minus(int a, int b) { return a - b; } ++int multiple(int a, int b) { return a * b; } ++int divide(int a, int b) { ++ if (b == 0) ++ return 0; ++ return a / b; ++} ++ ++int main() { ++ int a = 16; ++ int b = 8; ++ ++ for (int i = 1; i < 100000; i++) { ++ add(a, b); ++ minus(a, b); ++ multiple(a, b); ++ divide(a, b); ++ } ++ ++ return 0; ++} +diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds +new file mode 100644 +index 000000000..9cb4ebbf1 +--- /dev/null ++++ b/bolt/test/perf2bolt/Inputs/perf_test.lds +@@ -0,0 +1,13 @@ ++SECTIONS { ++ . = SIZEOF_HEADERS; ++ .interp : { *(.interp) } ++ .note.gnu.build-id : { *(.note.gnu.build-id) } ++ . = 0x212e8; ++ .dynsym : { *(.dynsym) } ++ . = 0x31860; ++ .text : { *(.text*) } ++ . = 0x41c20; ++ .fini_array : { *(.fini_array) } ++ . = 0x54e18; ++ .data : { *(.data) } ++} +diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg +new file mode 100644 +index 000000000..87a96ec34 +--- /dev/null ++++ b/bolt/test/perf2bolt/lit.local.cfg +@@ -0,0 +1,4 @@ ++import shutil ++ ++if shutil.which("perf") != None: ++ config.available_features.add("perf") +diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test +new file mode 100644 +index 000000000..fe6e015ab +--- /dev/null ++++ b/bolt/test/perf2bolt/perf_test.test +@@ -0,0 +1,17 @@ ++# Check perf2bolt binary function which was compiled with pie ++ ++REQUIRES: system-linux, perf ++ ++RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t ++RUN: perf record -e cycles:u -o %t2 -- %t ++RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s ++ ++CHECK-NOT: PERF2BOLT-ERROR ++CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. ++ ++RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 ++RUN: perf record -e cycles:u -o %t5 -- %t4 ++RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE ++ ++CHECK-NO-PIE-NOT: PERF2BOLT-ERROR ++CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. +diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp +index bac264141..5a80cb4a2 100644 +--- a/bolt/unittests/Core/BinaryContext.cpp ++++ b/bolt/unittests/Core/BinaryContext.cpp +@@ -83,3 +83,24 @@ TEST_P(BinaryContextTester, BaseAddress) { + BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000); + ASSERT_FALSE(BaseAddress.has_value()); + } ++ ++TEST_P(BinaryContextTester, BaseAddress2) { ++ // Check that base address calculation is correct for a binary if the ++ // alignment in ELF file are different from pagesize. ++ // The segment layout is as follows: ++ BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000}; ++ BC->SegmentMapInfo[0x31860] = ++ SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000}; ++ BC->SegmentMapInfo[0x41c20] = ++ SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000}; ++ BC->SegmentMapInfo[0x54e18] = ++ SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000}; ++ ++ std::optional BaseAddress = ++ BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000); ++ ASSERT_TRUE(BaseAddress.has_value()); ++ ASSERT_EQ(*BaseAddress, 0xaaaaea413000ULL); ++ ++ BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000); ++ ASSERT_FALSE(BaseAddress.has_value()); ++} +-- +2.39.2 (Apple Git-143) + diff --git a/0005-Add-block-correction-optimization.patch b/0005-Add-block-correction-optimization.patch deleted file mode 100644 index 32a861e5368baabe0ddd229a4bbf0ecffcc21057..0000000000000000000000000000000000000000 --- a/0005-Add-block-correction-optimization.patch +++ /dev/null @@ -1,2325 +0,0 @@ -From f2a5570a0821739a9e674e989ad1f734cc159570 Mon Sep 17 00:00:00 2001 -From: h00502206 -Date: Tue, 4 Jun 2024 21:52:50 +0800 -Subject: [PATCH] Add block correction optimization - ---- - bolt/CMakeLists.txt | 3 + - .../bolt/include/bolt/Core/BinaryBasicBlock.h | 34 +- - .../bolt/Core/BinaryBasicBlockFeature.h | 287 +++++ - .../bolt/include/bolt/Passes/FeatureMiner.h | 36 +- - .../include/bolt/Passes/StaticBranchInfo.h | 9 +- - .../bolt/include/bolt/Profile/DataReader.h | 74 +- - .../bolt/lib/Core/BinaryBasicBlockFeature.cpp | 25 + - bolt/lib/Core/CMakeLists.txt | 1 + - bolt/lib/Passes/CMakeLists.txt | 2 +- - bolt/lib/Passes/FeatureMiner.cpp | 1137 +++++------------ - .../bolt/lib/Passes/StaticBranchInfo.cpp | 6 +- - bolt/lib/Profile/CMakeLists.txt | 1 + - bolt/lib/Profile/DataReader.cpp | 126 +- - .../bolt/lib/Rewrite/RewriteInstance.cpp | 7 + - 14 files changed, 885 insertions(+), 863 deletions(-) - create mode 100644 bolt/include/bolt/Core/BinaryBasicBlockFeature.h - create mode 100644 bolt/lib/Core/BinaryBasicBlockFeature.cpp - -diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt -index 3de930496..09dabcdee 100644 ---- a/bolt/CMakeLists.txt -+++ b/bolt/CMakeLists.txt -@@ -9,6 +9,9 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|aarch64") - set(BOLT_ENABLE_RUNTIME ON) - endif() - -+ -+add_compile_options(-fexceptions) -+ - set(BOLT_CLANG_EXE "" CACHE FILEPATH "Path to clang executable for the target \ - architecture for use in BOLT tests") - set(BOLT_LLD_EXE "" CACHE FILEPATH "Path to lld executable for the target \ -diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h -index 0a82f467e..bfeb174de 100644 ---- a/bolt/include/bolt/Core/BinaryBasicBlock.h -+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h -@@ -15,6 +15,7 @@ - #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H - #define BOLT_CORE_BINARY_BASIC_BLOCK_H - -+#include "bolt/Core/BinaryBasicBlockFeature.h" - #include "bolt/Core/FunctionLayout.h" - #include "bolt/Core/MCPlus.h" - #include "llvm/ADT/GraphTraits.h" -@@ -25,6 +26,7 @@ - #include "llvm/Support/raw_ostream.h" - #include - #include -+#include - - namespace llvm { - class MCCodeEmitter; -@@ -62,6 +64,13 @@ public: - - using BranchInfoType = SmallVector; - -+ std::set ChildrenSet; -+ -+ std::set ParentSet; -+ -+ BinaryBasicBlockFeature block_features; -+ -+ - private: - /// Vector of all instructions in the block. - InstructionListType Instructions; -@@ -384,9 +393,11 @@ public: - /// corresponding to a jump condition which could be true or false. - /// Return nullptr if the basic block does not have a conditional jump. - BinaryBasicBlock *getConditionalSuccessor(bool Condition) { -- if (succ_size() != 2) -- return nullptr; -- return Successors[Condition == true ? 0 : 1]; -+ if (succ_size() == 2) -+ return Successors[Condition == true ? 0 : 1]; -+ if (succ_size() == 1) -+ return Successors[0]; -+ return nullptr; - } - - const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { -@@ -407,6 +418,11 @@ public: - return const_cast(this)->getFallthrough(); - } - -+ /// Return branch info corresponding to only branch. -+ const BinaryBranchInfo &getOnlyBranchInfo() const { -+ return BranchInfo[0]; -+ }; -+ - /// Return branch info corresponding to a taken branch. - const BinaryBranchInfo &getTakenBranchInfo() const { - assert(BranchInfo.size() == 2 && -@@ -807,6 +823,16 @@ public: - OutputAddressRange.second = Address; - } - -+ /// Sets features of this BB. -+ void setFeatures(BinaryBasicBlockFeature BBF) { -+ block_features = BBF; -+ } -+ -+ /// Gets numberic features of this BB. -+ BinaryBasicBlockFeature getFeatures() { -+ return block_features; -+ } -+ - /// Gets the memory address range of this BB in the input binary. - std::pair getInputAddressRange() const { - return InputRange; -@@ -974,7 +1000,7 @@ private: - #if defined(LLVM_ON_UNIX) - /// Keep the size of the BinaryBasicBlock within a reasonable size class - /// (jemalloc bucket) on Linux --static_assert(sizeof(BinaryBasicBlock) <= 256, ""); -+static_assert(sizeof(BinaryBasicBlock) <= 2048, ""); - #endif - - bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); -diff --git a/bolt/include/bolt/Core/BinaryBasicBlockFeature.h b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h -new file mode 100644 -index 000000000..0f123bd49 ---- /dev/null -+++ b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h -@@ -0,0 +1,287 @@ -+//===- bolt/Core/BinaryBasicBlockFeature.h - Low-level basic block -----*- C++ -*-===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// Features of BinaryBasicBlock -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H -+#define BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H -+ -+#include "bolt/Core/FunctionLayout.h" -+#include "bolt/Core/MCPlus.h" -+#include "llvm/ADT/GraphTraits.h" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/MC/MCInst.h" -+#include "llvm/MC/MCSymbol.h" -+#include "llvm/Support/ErrorOr.h" -+#include "llvm/Support/raw_ostream.h" -+#include -+#include -+ -+namespace llvm { -+ -+namespace bolt { -+ -+class BinaryBasicBlockFeature { -+ -+public: -+ -+ int32_t Opcode; -+ -+ int16_t Direction; -+ -+ int32_t CmpOpcode; -+ -+ int16_t LoopHeader; -+ -+ int16_t ProcedureType; -+ -+ int64_t Count; -+ -+ int64_t FallthroughCount; -+ -+ int64_t TotalLoops; -+ -+ int64_t LoopDepth; -+ -+ int64_t LoopNumBlocks; -+ -+ int64_t LocalExitingBlock; -+ -+ int64_t LocalLatchBlock; -+ -+ int64_t LocalLoopHeader; -+ -+ int64_t Call; -+ -+ int64_t DeltaTaken; -+ -+ int64_t NumLoads; -+ -+ int64_t NumCalls; -+ -+ int64_t OperandRAType; -+ -+ int64_t OperandRBType; -+ -+ int64_t BasicBlockSize; -+ -+ int64_t NumBasicBlocks; -+ -+ int64_t HasIndirectCalls; -+ -+ std::vector EndOpcode_vec; -+ -+ std::vector LoopHeader_vec; -+ -+ std::vector Backedge_vec; -+ -+ std::vector Exit_vec; -+ -+ std::vector Call_vec; -+ -+ std::vector BasicBlockSize_vec; -+ -+ std::vector InferenceFeatures; -+ -+ uint64_t FuncExec; -+ -+ int32_t ParentChildNum; -+ -+ int32_t ParentCount; -+ -+ int32_t ChildParentNum; -+ -+ int32_t ChildCount; -+ -+public: -+ -+ void setOpcode(const int32_t &BlockOpcode) { -+ Opcode = BlockOpcode; -+ } -+ -+ void setDirection(const int16_t &BlockDirection) { -+ Direction = BlockDirection; -+ } -+ -+ void setCmpOpcode(const int32_t &BlockCmpOpcode) { -+ CmpOpcode = BlockCmpOpcode; -+ } -+ -+ void setLoopHeader(const int16_t &BlockLoopHeader) { -+ LoopHeader = BlockLoopHeader; -+ } -+ -+ void setProcedureType(const int16_t &BlockProcedureType) { -+ ProcedureType = BlockProcedureType; -+ } -+ -+ void setCount(const int64_t &BlockCount) { -+ Count = BlockCount; -+ } -+ -+ void setFallthroughCount(const int64_t &BlockFallthroughCount) { -+ FallthroughCount = BlockFallthroughCount; -+ } -+ -+ void setTotalLoops(const int64_t &BlockTotalLoops) { -+ TotalLoops = BlockTotalLoops; -+ } -+ -+ void setLoopDepth(const int64_t &BlockLoopDepth) { -+ LoopDepth = BlockLoopDepth; -+ } -+ -+ void setLoopNumBlocks(const int64_t &BlockLoopNumBlocks) { -+ LoopNumBlocks = BlockLoopNumBlocks; -+ } -+ -+ void setLocalExitingBlock(const int64_t &BlockLocalExitingBlock) { -+ LocalExitingBlock = BlockLocalExitingBlock; -+ } -+ -+ void setLocalLatchBlock(const int64_t &BlockLocalLatchBlock) { -+ LocalLatchBlock = BlockLocalLatchBlock; -+ } -+ -+ void setLocalLoopHeader(const int64_t &BlockLocalLoopHeader) { -+ LocalLoopHeader = BlockLocalLoopHeader; -+ } -+ -+ void setDeltaTaken(const int64_t &BlockDeltaTaken) { -+ DeltaTaken = BlockDeltaTaken; -+ } -+ -+ void setNumLoads(const int64_t &BlockNumLoads) { -+ NumLoads = BlockNumLoads; -+ } -+ -+ void setNumCalls(const int64_t &BlockNumCalls) { -+ NumCalls = BlockNumCalls; -+ } -+ -+ void setOperandRAType(const int64_t &BlockOperandRAType) { -+ OperandRAType = BlockOperandRAType; -+ } -+ -+ void setOperandRBType(const int64_t &BlockOperandRBType) { -+ OperandRBType = BlockOperandRBType; -+ } -+ -+ void setBasicBlockSize(const int64_t &BlockBasicBlockSize) { -+ BasicBlockSize = BlockBasicBlockSize; -+ } -+ -+ void setNumBasicBlocks(const int64_t &BlockNumBasicBlocks) { -+ NumBasicBlocks = BlockNumBasicBlocks; -+ } -+ -+ void setHasIndirectCalls(const int64_t &BlockHasIndirectCalls) { -+ HasIndirectCalls = BlockHasIndirectCalls; -+ } -+ -+ void setEndOpcodeVec(const int32_t &EndOpcode) { -+ EndOpcode_vec.push_back(EndOpcode); -+ } -+ -+ void setLoopHeaderVec(const int16_t &LoopHeader) { -+ LoopHeader_vec.push_back(LoopHeader); -+ } -+ -+ void setBackedgeVec(const int16_t &Backedge) { -+ Backedge_vec.push_back(Backedge); -+ } -+ -+ void setExitVec(const int16_t &Exit) { -+ Exit_vec.push_back(Exit); -+ } -+ -+ void setCallVec(const int16_t &Call) { -+ Call_vec.push_back(Call); -+ } -+ -+ void setBasicBlockSizeVec(const int64_t &BasicBlockSize) { -+ BasicBlockSize_vec.push_back(BasicBlockSize); -+ } -+ -+ void setFunExec(const uint64_t &BlockFuncExec) { -+ FuncExec = BlockFuncExec; -+ } -+ -+ void setParentChildNum(const int32_t &BlockParentChildNum) { -+ ParentChildNum = BlockParentChildNum; -+ } -+ -+ void setParentCount(const int32_t &BlockParentCount) { -+ ParentCount = BlockParentCount; -+ } -+ -+ void setChildParentNum(const int32_t &BlockChildParentNum) { -+ ChildParentNum = BlockChildParentNum; -+ } -+ -+ void setChildCount(const int32_t &BlockChildCount) { -+ ChildCount = BlockChildCount; -+ } -+ -+ -+ -+ void setInferenceFeatures() { -+ -+ if (Count == -1 || FallthroughCount == -1) { -+ return; -+ } -+ if (ParentChildNum == -1 && ParentCount == -1 && ChildParentNum == -1 && ChildCount == -1){ -+ return; -+ } -+ -+ InferenceFeatures.push_back(static_cast(Direction)); -+ InferenceFeatures.push_back(static_cast(LoopHeader)); -+ InferenceFeatures.push_back(static_cast(ProcedureType)); -+ InferenceFeatures.push_back(static_cast(OperandRAType)); -+ InferenceFeatures.push_back(static_cast(OperandRBType)); -+ InferenceFeatures.push_back(static_cast(LoopHeader_vec[0])); -+ InferenceFeatures.push_back(static_cast(Backedge_vec[0])); -+ InferenceFeatures.push_back(static_cast(Exit_vec[0])); -+ InferenceFeatures.push_back(static_cast(LoopHeader_vec[1])); -+ InferenceFeatures.push_back(static_cast(Call_vec[0])); -+ InferenceFeatures.push_back(static_cast(LocalExitingBlock)); -+ InferenceFeatures.push_back(static_cast(HasIndirectCalls)); -+ InferenceFeatures.push_back(static_cast(LocalLatchBlock)); -+ InferenceFeatures.push_back(static_cast(LocalLoopHeader)); -+ InferenceFeatures.push_back(static_cast(Opcode)); -+ InferenceFeatures.push_back(static_cast(CmpOpcode)); -+ InferenceFeatures.push_back(static_cast(EndOpcode_vec[0])); -+ InferenceFeatures.push_back(static_cast(EndOpcode_vec[1])); -+ InferenceFeatures.push_back(static_cast(FuncExec)); -+ InferenceFeatures.push_back(static_cast(NumBasicBlocks)); -+ InferenceFeatures.push_back(static_cast(BasicBlockSize)); -+ InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[0])); -+ InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[1])); -+ InferenceFeatures.push_back(static_cast(LoopNumBlocks)); -+ InferenceFeatures.push_back(static_cast(NumLoads)); -+ InferenceFeatures.push_back(static_cast(NumCalls)); -+ InferenceFeatures.push_back(static_cast(TotalLoops)); -+ InferenceFeatures.push_back(static_cast(DeltaTaken)); -+ InferenceFeatures.push_back(static_cast(LoopDepth)); -+ InferenceFeatures.push_back(static_cast(ParentChildNum)); -+ InferenceFeatures.push_back(static_cast(ParentCount)); -+ InferenceFeatures.push_back(static_cast(ChildParentNum)); -+ InferenceFeatures.push_back(static_cast(ChildCount)); -+ } -+ -+ std::vector getInferenceFeatures() { -+ return InferenceFeatures; -+ } -+ -+}; -+} -+} -+ -+#endif -\ No newline at end of file -diff --git a/bolt/include/bolt/Passes/FeatureMiner.h b/bolt/include/bolt/Passes/FeatureMiner.h -index 916e5515d..b03666ebf 100644 ---- a/bolt/include/bolt/Passes/FeatureMiner.h -+++ b/bolt/include/bolt/Passes/FeatureMiner.h -@@ -14,19 +14,12 @@ - #ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ - #define LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ - --// #include "BinaryContext.h" --// #include "BinaryFunction.h" --// #include "BinaryLoop.h" --// #include "DominatorAnalysis.h" --// #include "Passes/BinaryPasses.h" --// #include "Passes/StaticBranchInfo.h" - #include "bolt/Core/BinaryData.h" - #include "bolt/Core/BinaryFunction.h" - #include "bolt/Core/BinaryLoop.h" - #include "bolt/Passes/DominatorAnalysis.h" - #include "bolt/Passes/BinaryPasses.h" - #include "bolt/Passes/StaticBranchInfo.h" -- - #include "llvm/ADT/DenseMap.h" - #include "llvm/ADT/Optional.h" - #include "llvm/ADT/StringRef.h" -@@ -36,6 +29,7 @@ - #include - #include - #include -+#include - - namespace llvm { - namespace bolt { -@@ -43,7 +37,6 @@ namespace bolt { - class FeatureMiner : public BinaryFunctionPass { - private: - std::unique_ptr SBI; -- - /// BasicBlockInfo - This structure holds feature information about the target - /// BasicBlock of either the taken or the fallthrough paths of a given branch. - struct BasicBlockInfo { -@@ -57,7 +50,7 @@ private: - Optional NumLoads; - Optional NumStores; - Optional EndOpcode; // 0 = NOTHING -- StringRef EndOpcodeStr = "UNDEF"; -+ std::string EndOpcodeStr = "UNDEF"; - Optional BasicBlockSize; - std::string FromFunName = "UNDEF"; - uint32_t FromBb; -@@ -76,8 +69,8 @@ private: - /// BranchFeaturesInfo - This structure holds feature information about each - /// two-way branch from the program. - struct BranchFeaturesInfo { -- StringRef OpcodeStr = "UNDEF"; -- StringRef CmpOpcodeStr = "UNDEF"; -+ std::string OpcodeStr = "UNDEF"; -+ std::string CmpOpcodeStr = "UNDEF"; - bool Simple = 0; - - Optional Opcode; -@@ -130,28 +123,28 @@ private: - }; - - typedef std::unique_ptr BFIPtr; -+ - std::vector BranchesInfoSet; - -+ - /// getProcedureType - Determines which category the function falls into: - /// Leaf, Non-leaf or Calls-self. - int8_t getProcedureType(BinaryFunction &Function, BinaryContext &BC); - - /// addSuccessorInfo - Discovers feature information for the target successor - /// basic block, and inserts it into the static branch info container. -- void addSuccessorInfo(DominatorAnalysis &DA, -- DominatorAnalysis &PDA, BFIPtr const &BFI, -- BinaryFunction &Function, BinaryContext &BC, -- MCInst &Inst, BinaryBasicBlock &BB, bool Succ); -+ void addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, -+ BinaryContext &BC, BinaryBasicBlock &BB, bool SuccType); - - /// extractFeatures - Extracts the feature information for each two-way branch - /// from the program. - void extractFeatures(BinaryFunction &Function, -- BinaryContext &BC, -- raw_ostream &Printer); -+ BinaryContext &BC); - -+ void generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, BFIPtr const &BFI, int Index); - /// dumpSuccessorFeatures - Dumps the feature information about the target - /// BasicBlock of either the taken or the fallthrough paths of a given branch. -- void dumpSuccessorFeatures(raw_ostream &Printer, BBIPtr &Successor); -+ void generateSuccessorFeatures(BBIPtr &Successor, BinaryBasicBlockFeature *BBF); - - /// dumpFeatures - Dumps the feature information about each two-way branch - /// from the program. -@@ -167,12 +160,17 @@ public: - explicit FeatureMiner(const cl::opt &PrintPass) - : BinaryFunctionPass(PrintPass) {} - -+ std::ofstream trainPrinter; -+ -+ - const char *getName() const override { return "feature-miner"; } - - void runOnFunctions(BinaryContext &BC) override; -+ void inferenceFeatures(BinaryFunction &Function); -+ void generateProfileFeatures(BinaryBasicBlock *BB, BinaryBasicBlockFeature *BBF); - }; - - } // namespace bolt - } // namespace llvm - --#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ -\ No newline at end of file -+#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ -diff --git a/bolt/include/bolt/Passes/StaticBranchInfo.h b/bolt/include/bolt/Passes/StaticBranchInfo.h -index 1713d3367..54a1f7cff 100644 ---- a/bolt/include/bolt/Passes/StaticBranchInfo.h -+++ b/bolt/include/bolt/Passes/StaticBranchInfo.h -@@ -18,16 +18,11 @@ - #ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ - #define LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ - --// #include "BinaryContext.h" --// #include "BinaryFunction.h" --// #include "BinaryLoop.h" - #include "bolt/Core/BinaryContext.h" - #include "bolt/Core/BinaryFunction.h" - #include "bolt/Core/BinaryLoop.h" -- - #include "llvm/MC/MCSymbol.h" --// add new include --#include -+#include - - namespace llvm { - namespace bolt { -@@ -113,4 +108,4 @@ public: - } // namespace bolt - } // namespace llvm - --#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ -\ No newline at end of file -+#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ -diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h -index 60cedfeb3..7e2c318fc 100644 ---- a/bolt/include/bolt/Profile/DataReader.h -+++ b/bolt/include/bolt/Profile/DataReader.h -@@ -21,6 +21,7 @@ - #include "llvm/Support/ErrorOr.h" - #include "llvm/Support/MemoryBuffer.h" - #include "llvm/Support/raw_ostream.h" -+#include - #include - #include - -@@ -43,6 +44,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { - return OS; - } - -+extern "C" { -+typedef void *(*CreateONNXRunnerFunc)(const char *); -+typedef void (*DeleteONNXRunnerFunc)(void *); -+typedef float (*RunONNXModelFunc)(void *, std::vector &, -+ std::vector &, std::vector &); -+} -+ - struct Location { - bool IsSymbol; - StringRef Name; -@@ -262,7 +270,8 @@ struct FuncSampleData { - class DataReader : public ProfileReaderBase { - public: - explicit DataReader(StringRef Filename) -- : ProfileReaderBase(Filename), Diag(errs()) {} -+ : ProfileReaderBase(Filename), Diag(errs()), onnxRunner(nullptr), -+ lib_handle(nullptr), handleOnnxRuntime(nullptr) {} - - StringRef getReaderName() const override { return "branch profile reader"; } - -@@ -281,7 +290,70 @@ public: - /// Return all event names used to collect this profile - virtual StringSet<> getEventNames() const override { return EventNames; } - -+ ~DataReader() { -+ // delete onnxrunner; -+ if (onnxRunner && lib_handle && handleOnnxRuntime) { -+ DeleteONNXRunnerFunc deleteONNXRunner = -+ (DeleteONNXRunnerFunc)dlsym(lib_handle, "deleteONNXRunner"); -+ deleteONNXRunner(onnxRunner); -+ dlclose(lib_handle); -+ dlclose(handleOnnxRuntime); -+ } -+ } -+ -+ /// Initialize the onnxruntime model. -+ void initializeONNXRunner(const std::string &modelPath) { -+ if (!onnxRunner && !lib_handle && !handleOnnxRuntime) { -+ handleOnnxRuntime = -+ dlopen("libonnxruntime.so.1.16.3", RTLD_LAZY | RTLD_GLOBAL); -+ lib_handle = -+ dlopen("libONNXRunner.so", RTLD_LAZY); -+ CreateONNXRunnerFunc createONNXRunner = -+ (CreateONNXRunnerFunc)dlsym(lib_handle, "createONNXRunner"); -+ onnxRunner = createONNXRunner(modelPath.c_str()); -+ } -+ } -+ -+ /// Inference step for predicting the BB counts based on the BB features. -+ float ONNXInference(std::vector &input_string, -+ std::vector &input_int64, -+ std::vector &input_float) { -+ if (onnxRunner && lib_handle) { -+ RunONNXModelFunc runONNXModel = -+ (RunONNXModelFunc)dlsym(lib_handle, "runONNXModel"); -+ float model_pred = -+ runONNXModel(onnxRunner, input_string, input_int64, input_float); -+ return model_pred; -+ } -+ return -1.0; -+ } -+ - protected: -+ /// The onnxruntime model pointer read from the input model path. -+ void *onnxRunner; -+ -+ /// The library handle of the ai4compiler framwork. -+ void *lib_handle; -+ -+ /// The library handle of the onnxruntime. -+ void *handleOnnxRuntime; -+ -+ /// The annotating threshold for the model prediction. -+ float threshold = 0.95; -+ -+ /// Return the annotating threshold for the model prediction. -+ float getThreshold() const { return threshold; } -+ -+ /// The counting value of the total modified BB-count number. -+ uint64_t modified_BB_total = 0; -+ -+ /// Add the total modified BB-count number by the BB modifiied number within -+ /// the funciton. -+ void addModifiedBBTotal(uint64_t &value) { modified_BB_total += value; } -+ -+ /// Return the counting value of the total modified BB-count number. -+ uint64_t getModifiedBBTotal() const { return modified_BB_total; } -+ - /// Read profile information available for the function. - void readProfile(BinaryFunction &BF); - -diff --git a/bolt/lib/Core/BinaryBasicBlockFeature.cpp b/bolt/lib/Core/BinaryBasicBlockFeature.cpp -new file mode 100644 -index 000000000..81d8c7546 ---- /dev/null -+++ b/bolt/lib/Core/BinaryBasicBlockFeature.cpp -@@ -0,0 +1,25 @@ -+//===- bolt/Core/BinaryBasicBlockFeature.cpp - Low-level basic block -------------===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file implements the BinaryBasicBlock class. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "bolt/Core/BinaryBasicBlock.h" -+#include "bolt/Core/BinaryBasicBlockFeature.h" -+ -+ -+#define DEBUG_TYPE "bolt" -+ -+namespace llvm { -+namespace bolt { -+ -+ -+ -+} // namespace bolt -+} // namespace llvm -\ No newline at end of file -diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt -index 501b5eb6e..25e9fb020 100644 ---- a/bolt/lib/Core/CMakeLists.txt -+++ b/bolt/lib/Core/CMakeLists.txt -@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS - - add_llvm_library(LLVMBOLTCore - BinaryBasicBlock.cpp -+ BinaryBasicBlockFeature.cpp - BinaryContext.cpp - BinaryData.cpp - BinaryEmitter.cpp -diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt -index 901ff614c..9f5abd101 100644 ---- a/bolt/lib/Passes/CMakeLists.txt -+++ b/bolt/lib/Passes/CMakeLists.txt -@@ -12,9 +12,9 @@ add_llvm_library(LLVMBOLTPasses - DataflowAnalysis.cpp - DataflowInfoManager.cpp - ExtTSPReorderAlgorithm.cpp -- FeatureMiner.cpp - FrameAnalysis.cpp - FrameOptimizer.cpp -+ FeatureMiner.cpp - HFSort.cpp - HFSortPlus.cpp - IdenticalCodeFolding.cpp -diff --git a/bolt/lib/Passes/FeatureMiner.cpp b/bolt/lib/Passes/FeatureMiner.cpp -index 680222906..abee69745 100644 ---- a/bolt/lib/Passes/FeatureMiner.cpp -+++ b/bolt/lib/Passes/FeatureMiner.cpp -@@ -11,15 +11,10 @@ - // https://dl.acm.org/doi/10.1145/239912.239923 - //===----------------------------------------------------------------------===// - --// #include "Passes/FeatureMiner.h" --// #include "Passes/DataflowInfoManager.h" --// #include "llvm/Support/CommandLine.h" --// #include "llvm/Support/Options.h" - #include "bolt/Passes/FeatureMiner.h" - #include "bolt/Passes/DataflowInfoManager.h" -+#include "bolt/Passes/StaticBranchInfo.h" - #include "llvm/Support/CommandLine.h" -- --// add new include - #include "llvm/Support/FileSystem.h" - - #undef DEBUG_TYPE -@@ -29,24 +24,7 @@ using namespace llvm; - using namespace bolt; - - namespace opts { -- --extern cl::OptionCategory InferenceCategory; -- --cl::opt VespaUseDFS( -- "vespa-dfs", -- cl::desc("use DFS ordering when using -gen-features option"), -- cl::init(false), -- cl::ReallyHidden, -- cl::ZeroOrMore, -- cl::cat(InferenceCategory)); -- --cl::opt IncludeValidProfile( -- "beetle-valid-profile-info", -- cl::desc("include valid profile information."), -- cl::init(false), -- cl::ReallyHidden, -- cl::ZeroOrMore, -- cl::cat(InferenceCategory)); -+extern cl::opt BlockCorrection; - - } // namespace opts - -@@ -75,50 +53,19 @@ int8_t FeatureMiner::getProcedureType(BinaryFunction &Function, - return ProcedureType; // leaf type - } - --void FeatureMiner::addSuccessorInfo(DominatorAnalysis &DA, -- DominatorAnalysis &PDA, -- BFIPtr const &BFI, BinaryFunction &Function, -- BinaryContext &BC, MCInst &Inst, -- BinaryBasicBlock &BB, bool SuccType) { -+void FeatureMiner::addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, -+ BinaryContext &BC, BinaryBasicBlock &BB, bool SuccType) { - - BinaryBasicBlock *Successor = BB.getConditionalSuccessor(SuccType); - - if (!Successor) - return; - -- unsigned NumLoads{0}; -- unsigned NumStores{0}; -- unsigned NumCallsExit{0}; - unsigned NumCalls{0}; -- unsigned NumCallsInvoke{0}; -- unsigned NumTailCalls{0}; -- unsigned NumIndirectCalls{0}; - - for (auto &Inst : BB) { -- if (BC.MIB->isLoad(Inst)) { -- ++NumLoads; -- } else if (BC.MIB->isStore(Inst)) { -- ++NumStores; -- } else if (BC.MIB->isCall(Inst)) { -+ if (BC.MIB->isCall(Inst)) { - ++NumCalls; -- -- if (BC.MIB->isIndirectCall(Inst)) -- ++NumIndirectCalls; -- -- if (BC.MIB->isInvoke(Inst)) -- ++NumCallsInvoke; -- -- if (BC.MIB->isTailCall(Inst)) -- ++NumTailCalls; -- -- if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { -- StringRef CalleeName = CalleeSymbol->getName(); -- if (CalleeName == "__cxa_throw@PLT" || -- CalleeName == "_Unwind_Resume@PLT" || -- CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || -- CalleeName == "abort@PLT") -- ++NumCallsExit; -- } - } - } - -@@ -138,117 +85,78 @@ void FeatureMiner::addSuccessorInfo(DominatorAnalysis &DA, - SuccBBInfo->Backedge = SBI->isBackEdge(&BB, Successor); - - MCInst *SuccInst = Successor->getTerminatorBefore(nullptr); -+ - // Store information about the branch type ending sucessor basic block - SuccBBInfo->EndOpcode = (SuccInst && BC.MIA->isBranch(*SuccInst)) - ? SuccInst->getOpcode() - : 0; // 0 = NOTHING -- if (SuccBBInfo->EndOpcode != 0) -- SuccBBInfo->EndOpcodeStr = BC.MII->getName(SuccInst->getOpcode()); -- else -- SuccBBInfo->EndOpcodeStr = "NOTHING"; - - // Check if the successor basic block contains - // a procedure call and store it. - SuccBBInfo->Call = (NumCalls > 0) ? 1 // Contains a call instruction - : 0; // Does not contain a call instruction - -- SuccBBInfo->NumStores = NumStores; -- SuccBBInfo->NumLoads = NumLoads; -- SuccBBInfo->NumCallsExit = NumCallsExit; -- SuccBBInfo->NumCalls = NumCalls; -- -- SuccBBInfo->NumCallsInvoke = NumCallsInvoke; -- SuccBBInfo->NumIndirectCalls = NumIndirectCalls; -- SuccBBInfo->NumTailCalls = NumTailCalls; -- -- auto InstSucc = Successor->getLastNonPseudoInstr(); -- if (InstSucc) { -- // Check if the source basic block dominates its -- // target basic block and store it. -- SuccBBInfo->BranchDominates = (DA.doesADominateB(Inst, *InstSucc) == true) -- ? 1 // Dominates -- : 0; // Does not dominate -- -- // Check if the target basic block postdominates -- // the source basic block and store it. -- SuccBBInfo->BranchPostdominates = -- (PDA.doesADominateB(*InstSucc, Inst) == true) -- ? 1 // Postdominates -- : 0; // Does not postdominate -- } - -- /// The follwoing information is used as an identifier only for -- /// the purpose of matching the inferred probabilities with the branches -- /// in the binary. -- SuccBBInfo->FromFunName = Function.getPrintName(); -- SuccBBInfo->FromBb = BB.getInputOffset(); -- BinaryFunction *ToFun = Successor->getFunction(); -- SuccBBInfo->ToFunName = ToFun->getPrintName(); -- SuccBBInfo->ToBb = Successor->getInputOffset(); -- -- auto Offset = BC.MIB->tryGetAnnotationAs(Inst, "Offset"); -- if (Offset) { -- uint32_t TargetOffset = Successor->getInputOffset(); -- uint32_t BranchOffset = Offset.get(); -- BFI->BranchOffset = BranchOffset; -- if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { -- int64_t Delta = TargetOffset - BranchOffset; -- BFI->DeltaTaken = std::abs(Delta); -- } -- } -+ uint32_t Offset = BB.getEndOffset(); - - if (SuccType) { - BFI->TrueSuccessor = std::move(SuccBBInfo); -- - // Check if the taken branch is a forward -- // or a backwards branch and store it. -+ // or a backwards branch and store it - BFI->Direction = (Function.isForwardBranch(&BB, Successor) == true) -- ? 1 // Forward branch -- : 0; // Backwards branch -- -- auto TakenBranchInfo = BB.getTakenBranchInfo(); -- BFI->Count = TakenBranchInfo.Count; -- BFI->MissPredicted = TakenBranchInfo.MispredictedCount; -+ ? 1 // Forward branch -+ : 0; // Backwards branch -+ -+ auto OnlyBranchInfo = BB.getOnlyBranchInfo(); -+ BFI->Count = OnlyBranchInfo.Count; -+ -+ if (Offset) { -+ uint32_t TargetOffset = Successor->getInputOffset(); -+ uint32_t BranchOffset = Offset; -+ if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { -+ int64_t Delta = static_cast(TargetOffset) - static_cast(BranchOffset); -+ BFI->DeltaTaken = std::abs(Delta); -+ } -+ } - } else { -+ if (BB.succ_size() == 2) { -+ auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); -+ BFI->FallthroughCount = FallthroughBranchInfo.Count; -+ } else { -+ auto OnlyBranchInfo = BB.getOnlyBranchInfo(); -+ BFI->FallthroughCount = OnlyBranchInfo.Count; -+ } - BFI->FalseSuccessor = std::move(SuccBBInfo); -- -- auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); -- BFI->FallthroughCount = FallthroughBranchInfo.Count; -- BFI->FallthroughMissPredicted = FallthroughBranchInfo.MispredictedCount; - } - } - --void FeatureMiner::extractFeatures(BinaryFunction &Function, BinaryContext &BC, -- raw_ostream &Printer) { -+void FeatureMiner::extractFeatures(BinaryFunction &Function, BinaryContext &BC) { - int8_t ProcedureType = getProcedureType(Function, BC); --// auto Info = DataflowInfoManager(BC, Function, nullptr, nullptr); - auto Info = DataflowInfoManager(Function, nullptr, nullptr); -- auto &DA = Info.getDominatorAnalysis(); -- auto &PDA = Info.getPostDominatorAnalysis(); - const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); -- bool Simple = Function.isSimple(); - --// const auto &Order = opts::VespaUseDFS ? Function.dfs() : Function.getLayout(); -+ bool Simple = Function.isSimple(); - const auto &Order = Function.dfs(); -+ std::string Function_name = Function.getPrintName(); -+ - - for (auto *BBA : Order) { -- -+ - auto &BB = *BBA; -- unsigned NumOuterLoops{0}; -+ -+ BinaryBasicBlockFeature BBF = BB.getFeatures(); -+ - unsigned TotalLoops{0}; -- unsigned MaximumLoopDepth{0}; - unsigned LoopDepth{0}; -- unsigned LoopNumExitEdges{0}; -- unsigned LoopNumExitBlocks{0}; -- unsigned LoopNumExitingBlocks{0}; -- unsigned LoopNumLatches{0}; - unsigned LoopNumBlocks{0}; -- unsigned LoopNumBackEdges{0}; - - bool LocalExitingBlock{false}; - bool LocalLatchBlock{false}; - bool LocalLoopHeader{false}; - -+ -+ generateProfileFeatures(&BB, &BBF); -+ - BinaryLoop *Loop = LoopsInfo.getLoopFor(&BB); - if (Loop) { - SmallVector ExitingBlocks; -@@ -263,414 +171,301 @@ void FeatureMiner::extractFeatures(BinaryFunction &Function, BinaryContext &BC, - SmallVector Latches; - Loop->getLoopLatches(Latches); - -- NumOuterLoops = LoopsInfo.OuterLoops; - TotalLoops = LoopsInfo.TotalLoops; -- MaximumLoopDepth = LoopsInfo.MaximumDepth; - LoopDepth = Loop->getLoopDepth(); -- LoopNumExitEdges = ExitEdges.size(); -- LoopNumExitBlocks = ExitBlocks.size(); -- LoopNumExitingBlocks = ExitingBlocks.size(); -- LoopNumLatches = Latches.size(); - LoopNumBlocks = Loop->getNumBlocks(); -- LoopNumBackEdges = Loop->getNumBackEdges(); -- - LocalExitingBlock = Loop->isLoopExiting(&BB); - LocalLatchBlock = Loop->isLoopLatch(&BB); - LocalLoopHeader = ((Loop->getHeader() == (&BB)) ? 1 : 0); - } - - unsigned NumLoads{0}; -- unsigned NumStores{0}; -- unsigned NumCallsExit{0}; - unsigned NumCalls{0}; -- unsigned NumCallsInvoke{0}; -- unsigned NumTailCalls{0}; - unsigned NumIndirectCalls{0}; -- unsigned NumSelfCalls{0}; - - for (auto &Inst : BB) { - if (BC.MIB->isLoad(Inst)) { - ++NumLoads; -- } else if (BC.MIB->isStore(Inst)) { -- ++NumStores; - } else if (BC.MIB->isCall(Inst)) { - ++NumCalls; -- - if (BC.MIB->isIndirectCall(Inst)) - ++NumIndirectCalls; -- -- if (BC.MIB->isInvoke(Inst)) -- ++NumCallsInvoke; -- -- if (BC.MIB->isTailCall(Inst)) -- ++NumTailCalls; -- -- if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { -- StringRef CalleeName = CalleeSymbol->getName(); -- if (CalleeName == "__cxa_throw@PLT" || -- CalleeName == "_Unwind_Resume@PLT" || -- CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || -- CalleeName == "abort@PLT") -- ++NumCallsExit; -- else if (CalleeName == Function.getPrintName()) { -- ++NumSelfCalls; -- } -- } - } - } - - int Index = -2; - bool LoopHeader = SBI->isLoopHeader(&BB); -+ -+ BFIPtr BFI = std::make_unique(); -+ -+ -+ BFI->TotalLoops = TotalLoops; -+ BFI->LoopDepth = LoopDepth; -+ BFI->LoopNumBlocks = LoopNumBlocks; -+ BFI->LocalExitingBlock = LocalExitingBlock; -+ BFI->LocalLatchBlock = LocalLatchBlock; -+ BFI->LocalLoopHeader = LocalLoopHeader; -+ BFI->NumCalls = NumCalls; -+ BFI->BasicBlockSize = BB.size(); -+ BFI->NumBasicBlocks = Function.size(); -+ -+ BFI->NumLoads = NumLoads; -+ BFI->NumIndirectCalls = NumIndirectCalls; -+ BFI->LoopHeader = LoopHeader; -+ BFI->ProcedureType = ProcedureType; -+ -+ // Adding taken successor info. -+ addSuccessorInfo(BFI, Function, BC, BB, true); -+ // Adding fall through successor info. -+ addSuccessorInfo(BFI, Function, BC, BB, false); -+ -+ MCInst ConditionalInst; -+ bool hasConditionalBranch = false; -+ MCInst UnconditionalInst; -+ bool hasUnconditionalBranch = false; -+ - for (auto &Inst : BB) { - ++Index; -- -- if (!BC.MIA->isConditionalBranch(Inst)) -+ if (!BC.MIA->isConditionalBranch(Inst) && !BC.MIA->isUnconditionalBranch(Inst)) - continue; - -- BFIPtr BFI = std::make_unique(); -- -- BFI->Simple = Simple; -- BFI->NumOuterLoops = NumOuterLoops; -- BFI->TotalLoops = TotalLoops; -- BFI->MaximumLoopDepth = MaximumLoopDepth; -- BFI->LoopDepth = LoopDepth; -- BFI->LoopNumExitEdges = LoopNumExitEdges; -- BFI->LoopNumExitBlocks = LoopNumExitBlocks; -- BFI->LoopNumExitingBlocks = LoopNumExitingBlocks; -- BFI->LoopNumLatches = LoopNumLatches; -- BFI->LoopNumBlocks = LoopNumBlocks; -- BFI->LoopNumBackEdges = LoopNumBackEdges; -- -- BFI->LocalExitingBlock = LocalExitingBlock; -- BFI->LocalLatchBlock = LocalLatchBlock; -- BFI->LocalLoopHeader = LocalLoopHeader; -- -- BFI->Call = ((NumCalls > 0) ? 1 : 0); -- BFI->NumCalls = NumCalls; -- -- BFI->BasicBlockSize = BB.size(); -- BFI->NumBasicBlocks = Function.size(); -- BFI->NumSelfCalls = NumSelfCalls; -- -- BFI->NumLoads = NumLoads; -- BFI->NumStores = NumStores; -- BFI->NumCallsExit = NumCallsExit; -- -- BFI->NumCallsInvoke = NumCallsInvoke; -- BFI->NumIndirectCalls = NumIndirectCalls; -- BFI->NumTailCalls = NumTailCalls; -- -- // Check if branch's basic block is a loop header and store it. -- BFI->LoopHeader = LoopHeader; -- -- // Adding taken successor info. -- addSuccessorInfo(DA, PDA, BFI, Function, BC, Inst, BB, true); -- // Adding fall through successor info. -- addSuccessorInfo(DA, PDA, BFI, Function, BC, Inst, BB, false); -- -- // Holds the branch opcode info. -- BFI->Opcode = Inst.getOpcode(); -- BFI->OpcodeStr = BC.MII->getName(Inst.getOpcode()); -- -- // Holds the branch's procedure type. -- BFI->ProcedureType = ProcedureType; -- -- BFI->CmpOpcode = 0; -- if (Index > -1) { -- auto Cmp = BB.begin() + Index; -- -- if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { -- // Holding the branch comparison opcode info. -- BFI->CmpOpcode = (*Cmp).getOpcode(); -+ generateInstFeatures(BC, BB, BFI, Index); - -- BFI->CmpOpcodeStr = BC.MII->getName((*Cmp).getOpcode()); -- -- auto getOperandType = [&](const MCOperand &Operand) -> int32_t { -- if (Operand.isReg()) -- return 0; -- else if (Operand.isImm()) -- return 1; -- // else if (Operand.isFPImm()) -- else if (Operand.isSFPImm()) -- return 2; -- else if (Operand.isExpr()) -- return 3; -- else -- return -1; -- }; -- -- const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); -- unsigned NumDefs = InstInfo.getNumDefs(); -- int32_t NumPrimeOperands = -- MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; -- switch (NumPrimeOperands) { -- case 6: { -- int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); -- int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); -- -- if (RBType == 0 && RAType == 0) { -- BFI->OperandRBType = RBType; -- BFI->OperandRAType = RAType; -- } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { -- RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); -- -- if (RAType != 1 && RAType != 2) { -- RAType = -1; -- } -- -- BFI->OperandRBType = RBType; -- BFI->OperandRAType = RAType; -- } else { -- BFI->OperandRAType = -1; -- BFI->OperandRBType = -1; -- } -- break; -- } -- case 2: -- BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); -- BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); -- break; -- case 3: -- BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); -- BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); -- break; -- case 1: -- BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); -- break; -- default: -- BFI->OperandRAType = -1; -- BFI->OperandRBType = -1; -- break; -- } -- -- } else { -- Index -= 1; -- for (int Idx = Index; Idx > -1; Idx--) { -- auto Cmp = BB.begin() + Idx; -- if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { -- // Holding the branch comparison opcode info. -- BFI->CmpOpcode = (*Cmp).getOpcode(); -- BFI->CmpOpcodeStr = BC.MII->getName((*Cmp).getOpcode()); -- break; -- } -- } -- } -+ if (BC.MIA->isConditionalBranch(Inst)) { -+ ConditionalInst = Inst; -+ hasConditionalBranch = true; - } -+ -+ -+ if (BC.MIA->isUnconditionalBranch(Inst)){ -+ UnconditionalInst = Inst; -+ hasUnconditionalBranch = true; -+ } -+ -+ } - -- //======================================================================== -- -- auto &FalseSuccessor = BFI->FalseSuccessor; -- auto &TrueSuccessor = BFI->TrueSuccessor; -+ if (hasConditionalBranch) { -+ BFI->Opcode = ConditionalInst.getOpcode(); - -- if (!FalseSuccessor && !TrueSuccessor) -- continue; -+ } else { -+ if (hasUnconditionalBranch) { -+ BFI->Opcode = UnconditionalInst.getOpcode(); - -- int64_t BranchOffset = -- (BFI->BranchOffset.hasValue()) -- ? static_cast(*(BFI->BranchOffset)) -- : -1; -- if(BranchOffset == -1) -- continue; -+ } else { -+ auto Inst = BB.getLastNonPseudoInstr(); -+ BFI->Opcode = Inst->getOpcode(); -+ generateInstFeatures(BC, BB, BFI, Index); -+ } -+ } -+ -+ auto &FalseSuccessor = BFI->FalseSuccessor; -+ auto &TrueSuccessor = BFI->TrueSuccessor; - -- int16_t ProcedureType = (BFI->ProcedureType.hasValue()) -+ int16_t ProcedureType = (BFI->ProcedureType.hasValue()) - ? static_cast(*(BFI->ProcedureType)) - : -1; - -- int16_t Direction = (BFI->Direction.hasValue()) -- ? static_cast(*(BFI->Direction)) -- : -1; -+ int64_t Count = (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; - -- int16_t LoopHeader = (BFI->LoopHeader.hasValue()) -- ? static_cast(*(BFI->LoopHeader)) -- : -1; -- -- int32_t Opcode = -- (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; -- -- int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) -- ? static_cast(*(BFI->CmpOpcode)) -- : -1; -- -- int64_t Count = -- (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; -- -- int64_t MissPredicted = (BFI->MissPredicted.hasValue()) -- ? static_cast(*(BFI->MissPredicted)) -- : -1; -- -- int64_t FallthroughCount = -- (BFI->FallthroughCount.hasValue()) -+ int64_t FallthroughCount = (BFI->FallthroughCount.hasValue()) - ? static_cast(*(BFI->FallthroughCount)) - : -1; - -- int64_t FallthroughMissPredicted = -- (BFI->FallthroughMissPredicted.hasValue()) -- ? static_cast(*(BFI->FallthroughMissPredicted)) -- : -1; -- -- int64_t NumOuterLoops = (BFI->NumOuterLoops.hasValue()) -- ? static_cast(*(BFI->NumOuterLoops)) -- : -1; -- int64_t TotalLoops = (BFI->TotalLoops.hasValue()) -- ? static_cast(*(BFI->TotalLoops)) -+ int16_t LoopHeaderValid = (BFI->LoopHeader.hasValue()) -+ ? static_cast(*(BFI->LoopHeader)) - : -1; -- int64_t MaximumLoopDepth = -- (BFI->MaximumLoopDepth.hasValue()) -- ? static_cast(*(BFI->MaximumLoopDepth)) -- : -1; -- int64_t LoopDepth = (BFI->LoopDepth.hasValue()) -+ -+ int64_t TotalLoopsValid = (BFI->TotalLoops.hasValue()) -+ ? static_cast(*(BFI->TotalLoops)) -+ : -1; -+ int64_t LoopDepthValid = (BFI->LoopDepth.hasValue()) - ? static_cast(*(BFI->LoopDepth)) - : -1; -- int64_t LoopNumExitEdges = -- (BFI->LoopNumExitEdges.hasValue()) -- ? static_cast(*(BFI->LoopNumExitEdges)) -- : -1; -- int64_t LoopNumExitBlocks = -- (BFI->LoopNumExitBlocks.hasValue()) -- ? static_cast(*(BFI->LoopNumExitBlocks)) -- : -1; -- int64_t LoopNumExitingBlocks = -- (BFI->LoopNumExitingBlocks.hasValue()) -- ? static_cast(*(BFI->LoopNumExitingBlocks)) -- : -1; -- int64_t LoopNumLatches = -- (BFI->LoopNumLatches.hasValue()) -- ? static_cast(*(BFI->LoopNumLatches)) -- : -1; -- int64_t LoopNumBlocks = (BFI->LoopNumBlocks.hasValue()) -- ? static_cast(*(BFI->LoopNumBlocks)) -- : -1; -- int64_t LoopNumBackEdges = -- (BFI->LoopNumBackEdges.hasValue()) -- ? static_cast(*(BFI->LoopNumBackEdges)) -- : -1; -+ int64_t LoopNumBlocksValid = (BFI->LoopNumBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumBlocks)) -+ : -1; -+ int64_t LocalExitingBlockValid = -+ (BFI->LocalExitingBlock.hasValue()) -+ ? static_cast(*(BFI->LocalExitingBlock)) -+ : -1; - -- int64_t LocalExitingBlock = -- (BFI->LocalExitingBlock.hasValue()) -- ? static_cast(*(BFI->LocalExitingBlock)) -- : -1; -+ int64_t LocalLatchBlockValid = (BFI->LocalLatchBlock.hasValue()) -+ ? static_cast(*(BFI->LocalLatchBlock)) -+ : -1; - -- int64_t LocalLatchBlock = (BFI->LocalLatchBlock.hasValue()) -- ? static_cast(*(BFI->LocalLatchBlock)) -- : -1; -+ int64_t LocalLoopHeaderValid = (BFI->LocalLoopHeader.hasValue()) -+ ? static_cast(*(BFI->LocalLoopHeader)) -+ : -1; - -- int64_t LocalLoopHeader = (BFI->LocalLoopHeader.hasValue()) -- ? static_cast(*(BFI->LocalLoopHeader)) -- : -1; -+ int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) -+ ? static_cast(*(BFI->CmpOpcode)) -+ : -1; - -- int64_t Call = -- (BFI->Call.hasValue()) ? static_cast(*(BFI->Call)) : -1; -+ int64_t OperandRAType = (BFI->OperandRAType.hasValue()) -+ ? static_cast(*(BFI->OperandRAType)) -+ : 10; - -- int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) -- ? static_cast(*(BFI->DeltaTaken)) -- : -1; -+ int64_t OperandRBType = (BFI->OperandRBType.hasValue()) -+ ? static_cast(*(BFI->OperandRBType)) -+ : 10; -+ int16_t Direction = (BFI->Direction.hasValue()) -+ ? static_cast(*(BFI->Direction)) -+ : -1; - -- int64_t NumLoads = (BFI->NumLoads.hasValue()) -- ? static_cast(*(BFI->NumLoads)) -- : -1; -+ int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) -+ ? static_cast(*(BFI->DeltaTaken)) -+ : -1; - -- int64_t NumStores = (BFI->NumStores.hasValue()) -- ? static_cast(*(BFI->NumStores)) -- : -1; -+ int64_t NumLoadsValid = (BFI->NumLoads.hasValue()) -+ ? static_cast(*(BFI->NumLoads)) -+ : -1; - -- int64_t BasicBlockSize = -- (BFI->BasicBlockSize.hasValue()) -- ? static_cast(*(BFI->BasicBlockSize)) -- : -1; -+ int64_t BasicBlockSize = -+ (BFI->BasicBlockSize.hasValue()) -+ ? static_cast(*(BFI->BasicBlockSize)) -+ : -1; - -- int64_t NumBasicBlocks = -- (BFI->NumBasicBlocks.hasValue()) -- ? static_cast(*(BFI->NumBasicBlocks)) -- : -1; -+ int64_t NumBasicBlocks = -+ (BFI->NumBasicBlocks.hasValue()) -+ ? static_cast(*(BFI->NumBasicBlocks)) -+ : -1; - -- int64_t NumCalls = (BFI->NumCalls.hasValue()) -+ int64_t NumCallsValid = (BFI->NumCalls.hasValue()) - ? static_cast(*(BFI->NumCalls)) - : -1; - -- int64_t NumSelfCalls = (BFI->NumSelfCalls.hasValue()) -- ? static_cast(*(BFI->NumSelfCalls)) -- : -1; -- -- int64_t NumCallsExit = (BFI->NumCallsExit.hasValue()) -- ? static_cast(*(BFI->NumCallsExit)) -- : -1; -- -- int64_t OperandRAType = (BFI->OperandRAType.hasValue()) -- ? static_cast(*(BFI->OperandRAType)) -- : -1; -- -- int64_t OperandRBType = (BFI->OperandRBType.hasValue()) -- ? static_cast(*(BFI->OperandRBType)) -- : -1; -- -- int64_t NumCallsInvoke = -- (BFI->NumCallsInvoke.hasValue()) -- ? static_cast(*(BFI->NumCallsInvoke)) -- : -1; -- -- int64_t NumIndirectCalls = -+ int64_t NumIndirectCallsValid = - (BFI->NumIndirectCalls.hasValue()) - ? static_cast(*(BFI->NumIndirectCalls)) - : -1; - -- int64_t NumTailCalls = (BFI->NumTailCalls.hasValue()) -- ? static_cast(*(BFI->NumTailCalls)) -- : -1; -- -- Printer << BFI->Simple << "," << Opcode << "," << BFI->OpcodeStr << "," -- << Direction << "," << CmpOpcode << "," << BFI->CmpOpcodeStr -- << "," << LoopHeader << "," << ProcedureType << "," << Count -- << "," << MissPredicted << "," << FallthroughCount << "," -- << FallthroughMissPredicted << "," << NumOuterLoops << "," -- << NumCallsExit << "," << TotalLoops << "," << MaximumLoopDepth -- << "," << LoopDepth << "," << LoopNumExitEdges << "," -- << LoopNumExitBlocks << "," << LoopNumExitingBlocks << "," -- << LoopNumLatches << "," << LoopNumBlocks << "," -- << LoopNumBackEdges << "," << LocalExitingBlock << "," -- << LocalLatchBlock << "," << LocalLoopHeader << "," << Call << "," -- << DeltaTaken << "," << NumLoads << "," << NumStores << "," -- << NumCalls << "," << OperandRAType << "," << OperandRBType << "," -- << BasicBlockSize << "," << NumBasicBlocks << "," -- << NumCallsInvoke << "," << NumIndirectCalls << "," -- << NumTailCalls << "," << NumSelfCalls; -- -- if (FalseSuccessor && TrueSuccessor) { -- dumpSuccessorFeatures(Printer, TrueSuccessor); -- dumpSuccessorFeatures(Printer, FalseSuccessor); -- -- FalseSuccessor.reset(); -- TrueSuccessor.reset(); -- } -- BFI.reset(); -- -- std::string BranchOffsetStr = (BranchOffset == -1) ? "None" : Twine::utohexstr(BranchOffset).str(); -+ int64_t HasIndirectCalls = (NumIndirectCallsValid > 0) ? 1 : 0; -+ -+ int32_t Opcode = (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; -+ -+ uint64_t fun_exec = Function.getExecutionCount(); -+ fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; -+ -+ BBF.setDirection(Direction); -+ BBF.setDeltaTaken(DeltaTaken); -+ BBF.setOpcode(Opcode); -+ BBF.setCmpOpcode(CmpOpcode); -+ BBF.setOperandRAType(OperandRAType); -+ BBF.setOperandRBType(OperandRBType); -+ BBF.setFunExec(fun_exec); -+ BBF.setTotalLoops(TotalLoopsValid); -+ BBF.setLoopDepth(LoopDepthValid); -+ BBF.setLoopNumBlocks(LoopNumBlocksValid); -+ BBF.setLocalExitingBlock(LocalExitingBlockValid); -+ BBF.setLocalLatchBlock(LocalLatchBlockValid); -+ BBF.setLocalLoopHeader(LocalLoopHeaderValid); -+ BBF.setNumCalls(NumCallsValid); -+ BBF.setBasicBlockSize(BasicBlockSize); -+ BBF.setNumBasicBlocks(NumBasicBlocks); -+ BBF.setNumLoads(NumLoadsValid); -+ BBF.setHasIndirectCalls(HasIndirectCalls); -+ BBF.setLoopHeader(LoopHeaderValid); -+ BBF.setProcedureType(ProcedureType); -+ BBF.setCount(Count); -+ BBF.setFallthroughCount(FallthroughCount); -+ -+ -+ generateSuccessorFeatures(TrueSuccessor, &BBF); -+ generateSuccessorFeatures(FalseSuccessor, &BBF); -+ -+ FalseSuccessor.reset(); -+ TrueSuccessor.reset(); -+ -+ BBF.setInferenceFeatures(); -+ BB.setFeatures(BBF); -+ -+ BFI.reset(); -+ } -+} - -- uint64_t fun_exec = Function.getExecutionCount(); -- fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; -- Printer << "," << Twine::utohexstr(Function.getAddress()) << "," -- << fun_exec << "," << Function.getFunctionNumber() << "," -- << Function.getOneName() << "," << Function.getPrintName() -- << "," << BranchOffsetStr -- << "\n"; -+void FeatureMiner::generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, BFIPtr const &BFI, int Index) { -+ -+ // Holds the branch opcode info. -+ -+ BFI->CmpOpcode = 0; -+ if (Index > -1) { -+ auto Cmp = BB.begin() + Index; -+ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { -+ // Holding the branch comparison opcode info. -+ BFI->CmpOpcode = (*Cmp).getOpcode(); -+ auto getOperandType = [&](const MCOperand &Operand) -> int32_t { -+ if (Operand.isReg()) -+ return 0; -+ else if (Operand.isImm()) -+ return 1; -+ else if (Operand.isSFPImm()) -+ return 2; -+ else if (Operand.isExpr()) -+ return 3; -+ else -+ return -1; -+ }; -+ -+ const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); -+ unsigned NumDefs = InstInfo.getNumDefs(); -+ int32_t NumPrimeOperands = -+ MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; -+ switch (NumPrimeOperands) { -+ case 6: { -+ int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); -+ int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); -+ -+ if (RBType == 0 && RAType == 0) { -+ BFI->OperandRBType = RBType; -+ BFI->OperandRAType = RAType; -+ } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { -+ RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); -+ -+ if (RAType != 1 && RAType != 2) { -+ RAType = -1; -+ } - -- //======================================================================== -+ BFI->OperandRBType = RBType; -+ BFI->OperandRAType = RAType; -+ } else { -+ BFI->OperandRAType = -1; -+ BFI->OperandRBType = -1; -+ } -+ break; -+ } -+ case 2: -+ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); -+ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); -+ break; -+ case 3: -+ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); -+ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); -+ break; -+ case 1: -+ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); -+ break; -+ default: -+ BFI->OperandRAType = -1; -+ BFI->OperandRBType = -1; -+ break; -+ } - -- // this->BranchesInfoSet.push_back(std::move(BFI)); -+ } else { -+ Index -= 1; -+ for (int Idx = Index; Idx > -1; Idx--) { -+ auto Cmp = BB.begin() + Idx; -+ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { -+ // Holding the branch comparison opcode info. -+ BFI->CmpOpcode = (*Cmp).getOpcode(); -+ break; -+ } -+ } - } - } - } - --void FeatureMiner::dumpSuccessorFeatures(raw_ostream &Printer, -- BBIPtr &Successor) { -- int16_t BranchDominates = -- (Successor->BranchDominates.hasValue()) -- ? static_cast(*(Successor->BranchDominates)) -- : -1; -+void FeatureMiner::generateSuccessorFeatures(BBIPtr &Successor, BinaryBasicBlockFeature *BBF) { - -- int16_t BranchPostdominates = -- (Successor->BranchPostdominates.hasValue()) -- ? static_cast(*(Successor->BranchPostdominates)) -- : -1; - - int16_t LoopHeader = (Successor->LoopHeader.hasValue()) - ? static_cast(*(Successor->LoopHeader)) -@@ -690,378 +485,86 @@ void FeatureMiner::dumpSuccessorFeatures(raw_ostream &Printer, - ? static_cast(*(Successor->EndOpcode)) - : -1; - -- int64_t NumLoads = (Successor->NumLoads.hasValue()) -- ? static_cast(*(Successor->NumLoads)) -- : -1; -- -- int64_t NumStores = (Successor->NumStores.hasValue()) -- ? static_cast(*(Successor->NumStores)) -- : -1; -- - int64_t BasicBlockSize = - (Successor->BasicBlockSize.hasValue()) - ? static_cast(*(Successor->BasicBlockSize)) - : -1; - -- int64_t NumCalls = (Successor->NumCalls.hasValue()) -- ? static_cast(*(Successor->NumCalls)) -- : -1; -- -- int64_t NumCallsExit = (Successor->NumCallsExit.hasValue()) -- ? static_cast(*(Successor->NumCallsExit)) -- : -1; -- -- int64_t NumCallsInvoke = -- (Successor->NumCallsInvoke.hasValue()) -- ? static_cast(*(Successor->NumCallsInvoke)) -- : -1; -- -- int64_t NumIndirectCalls = -- (Successor->NumIndirectCalls.hasValue()) -- ? static_cast(*(Successor->NumIndirectCalls)) -- : -1; -+ BBF->setEndOpcodeVec(EndOpcode); -+ BBF->setLoopHeaderVec(LoopHeader); -+ BBF->setBackedgeVec(Backedge); -+ BBF->setExitVec(Exit); -+ BBF->setCallVec(Call); -+ BBF->setBasicBlockSizeVec(BasicBlockSize); - -- int64_t NumTailCalls = (Successor->NumTailCalls.hasValue()) -- ? static_cast(*(Successor->NumTailCalls)) -- : -1; -- -- Printer << "," << BranchDominates << "," << BranchPostdominates << "," -- << EndOpcode << "," << Successor->EndOpcodeStr << "," << LoopHeader -- << "," << Backedge << "," << Exit << "," << Call << "," -- << Successor->FromFunName << "," -- << Twine::utohexstr(Successor->FromBb) << "," << Successor->ToFunName -- << "," << Twine::utohexstr(Successor->ToBb) << "," << NumLoads << "," -- << NumStores << "," << BasicBlockSize << "," << NumCalls << "," -- << NumCallsExit << "," << NumIndirectCalls << "," << NumCallsInvoke -- << "," << NumTailCalls; - } - --void FeatureMiner::dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, -- uint64_t FunctionFrequency) { -- -- for (auto const &BFI : BranchesInfoSet) { -- auto &FalseSuccessor = BFI->FalseSuccessor; -- auto &TrueSuccessor = BFI->TrueSuccessor; -- -- if (!FalseSuccessor && !TrueSuccessor) -- continue; -- -- int16_t ProcedureType = (BFI->ProcedureType.hasValue()) -- ? static_cast(*(BFI->ProcedureType)) -- : -1; -- -- int16_t Direction = -- (BFI->Direction.hasValue()) ? static_cast(*(BFI->Direction)) : -1; -- -- int16_t LoopHeader = (BFI->LoopHeader.hasValue()) -- ? static_cast(*(BFI->LoopHeader)) -- : -1; -- -- int32_t Opcode = -- (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; -- -- int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) -- ? static_cast(*(BFI->CmpOpcode)) -- : -1; -- -- int64_t Count = -- (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; -- -- int64_t MissPredicted = (BFI->MissPredicted.hasValue()) -- ? static_cast(*(BFI->MissPredicted)) -- : -1; -- -- int64_t FallthroughCount = -- (BFI->FallthroughCount.hasValue()) -- ? static_cast(*(BFI->FallthroughCount)) -- : -1; -- -- int64_t FallthroughMissPredicted = -- (BFI->FallthroughMissPredicted.hasValue()) -- ? static_cast(*(BFI->FallthroughMissPredicted)) -- : -1; -- -- int64_t NumOuterLoops = (BFI->NumOuterLoops.hasValue()) -- ? static_cast(*(BFI->NumOuterLoops)) -- : -1; -- int64_t TotalLoops = (BFI->TotalLoops.hasValue()) -- ? static_cast(*(BFI->TotalLoops)) -- : -1; -- int64_t MaximumLoopDepth = -- (BFI->MaximumLoopDepth.hasValue()) -- ? static_cast(*(BFI->MaximumLoopDepth)) -- : -1; -- int64_t LoopDepth = (BFI->LoopDepth.hasValue()) -- ? static_cast(*(BFI->LoopDepth)) -- : -1; -- int64_t LoopNumExitEdges = -- (BFI->LoopNumExitEdges.hasValue()) -- ? static_cast(*(BFI->LoopNumExitEdges)) -- : -1; -- int64_t LoopNumExitBlocks = -- (BFI->LoopNumExitBlocks.hasValue()) -- ? static_cast(*(BFI->LoopNumExitBlocks)) -- : -1; -- int64_t LoopNumExitingBlocks = -- (BFI->LoopNumExitingBlocks.hasValue()) -- ? static_cast(*(BFI->LoopNumExitingBlocks)) -- : -1; -- int64_t LoopNumLatches = (BFI->LoopNumLatches.hasValue()) -- ? static_cast(*(BFI->LoopNumLatches)) -- : -1; -- int64_t LoopNumBlocks = (BFI->LoopNumBlocks.hasValue()) -- ? static_cast(*(BFI->LoopNumBlocks)) -- : -1; -- int64_t LoopNumBackEdges = -- (BFI->LoopNumBackEdges.hasValue()) -- ? static_cast(*(BFI->LoopNumBackEdges)) -- : -1; -- -- int64_t LocalExitingBlock = -- (BFI->LocalExitingBlock.hasValue()) -- ? static_cast(*(BFI->LocalExitingBlock)) -- : -1; -- -- int64_t LocalLatchBlock = (BFI->LocalLatchBlock.hasValue()) -- ? static_cast(*(BFI->LocalLatchBlock)) -- : -1; -- -- int64_t LocalLoopHeader = (BFI->LocalLoopHeader.hasValue()) -- ? static_cast(*(BFI->LocalLoopHeader)) -- : -1; -- -- int64_t Call = -- (BFI->Call.hasValue()) ? static_cast(*(BFI->Call)) : -1; -- -- int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) -- ? static_cast(*(BFI->DeltaTaken)) -- : -1; -- -- int64_t NumLoads = (BFI->NumLoads.hasValue()) -- ? static_cast(*(BFI->NumLoads)) -- : -1; -- -- int64_t NumStores = (BFI->NumStores.hasValue()) -- ? static_cast(*(BFI->NumStores)) -- : -1; -- -- int64_t BasicBlockSize = (BFI->BasicBlockSize.hasValue()) -- ? static_cast(*(BFI->BasicBlockSize)) -- : -1; -- -- int64_t BranchOffset = (BFI->BranchOffset.hasValue()) -- ? static_cast(*(BFI->BranchOffset)): -1; -- -- int64_t NumBasicBlocks = (BFI->NumBasicBlocks.hasValue()) -- ? static_cast(*(BFI->NumBasicBlocks)) -- : -1; -- -- int64_t NumCalls = (BFI->NumCalls.hasValue()) -- ? static_cast(*(BFI->NumCalls)) -- : -1; -- -- int64_t NumSelfCalls = (BFI->NumSelfCalls.hasValue()) -- ? static_cast(*(BFI->NumSelfCalls)) -- : -1; -- -- int64_t NumCallsExit = (BFI->NumCallsExit.hasValue()) -- ? static_cast(*(BFI->NumCallsExit)) -- : -1; -- -- int64_t OperandRAType = (BFI->OperandRAType.hasValue()) -- ? static_cast(*(BFI->OperandRAType)) -- : -1; -- -- int64_t OperandRBType = (BFI->OperandRBType.hasValue()) -- ? static_cast(*(BFI->OperandRBType)) -- : -1; -- -- int64_t NumCallsInvoke = (BFI->NumCallsInvoke.hasValue()) -- ? static_cast(*(BFI->NumCallsInvoke)) -- : -1; -+void FeatureMiner::runOnFunctions(BinaryContext &BC){ - -- int64_t NumIndirectCalls = -- (BFI->NumIndirectCalls.hasValue()) -- ? static_cast(*(BFI->NumIndirectCalls)) -- : -1; -- -- int64_t NumTailCalls = (BFI->NumTailCalls.hasValue()) -- ? static_cast(*(BFI->NumTailCalls)) -- : -1; -- -- Printer << BFI->Simple << "," << Opcode << "," << BFI->OpcodeStr << "," -- << Direction << "," << CmpOpcode << "," << BFI->CmpOpcodeStr << "," -- << LoopHeader << "," << ProcedureType << "," << Count << "," -- << MissPredicted << "," << FallthroughCount << "," -- << FallthroughMissPredicted << "," << NumOuterLoops << "," -- << NumCallsExit << "," << TotalLoops << "," << MaximumLoopDepth -- << "," << LoopDepth << "," << LoopNumExitEdges << "," -- << LoopNumExitBlocks << "," << LoopNumExitingBlocks << "," -- << LoopNumLatches << "," << LoopNumBlocks << "," << LoopNumBackEdges -- << "," << LocalExitingBlock << "," << LocalLatchBlock << "," -- << LocalLoopHeader << "," << Call << "," << DeltaTaken << "," -- << NumLoads << "," << NumStores << "," << NumCalls << "," -- << OperandRAType << "," << OperandRBType << "," << BasicBlockSize -- << "," << NumBasicBlocks << "," << NumCallsInvoke << "," -- << NumIndirectCalls << "," << NumTailCalls << "," << NumSelfCalls; -- -- if (FalseSuccessor && TrueSuccessor) { -- dumpSuccessorFeatures(Printer, TrueSuccessor); -- dumpSuccessorFeatures(Printer, FalseSuccessor); -- } -- -- Printer << "," << Twine::utohexstr(FunctionAddress) << "," -- << FunctionFrequency << "\n"; -- } -- BranchesInfoSet.clear(); - } - --void FeatureMiner::runOnFunctions(BinaryContext &BC) { -- auto FileName = "features_new.csv"; -- outs() << "BOLT-INFO: Starting feature miner pass\n"; -- -- std::error_code EC; --// raw_fd_ostream Printer(FileName, EC, sys::fs::F_None); -- raw_fd_ostream Printer(FileName, EC, sys::fs::OF_None); -- -- if (EC) { -- errs() << "BOLT-WARNING: " << EC.message() << ", unable to open " -- << FileName << " for output.\n"; -- return; -- } -- -- auto FILENAME = "profile_data_regular.fdata"; --// raw_fd_ostream Printer2(FILENAME, EC, sys::fs::F_None); -- raw_fd_ostream Printer2(FILENAME, EC, sys::fs::OF_None); -- -- if (EC) { -- dbgs() << "BOLT-WARNING: " << EC.message() << ", unable to open" -- << " " << FILENAME << " for output.\n"; -- return; -- } -+void FeatureMiner::inferenceFeatures(BinaryFunction &Function){ - -- // CSV file header -- Printer << "FUN_TYPE,OPCODE,OPCODE_STR,DIRECTION,CMP_OPCODE,CMP_OPCODE_STR," -- "LOOP_HEADER,PROCEDURE_TYPE," -- "COUNT_TAKEN,MISS_TAKEN,COUNT_NOT_TAKEN,MISS_NOT_TAKEN," -- "NUM_OUTER_LOOPS,NUM_CALLS_EXIT,TOTAL_LOOPS,MAXIMUM_LOOP_DEPTH," -- "LOOP_DEPTH,LOOP_NUM_EXIT_EDGES,LOOP_NUM_EXIT_BLOCKS," -- "LOOP_NUM_EXITING_BLOCKS,LOOP_NUM_LATCHES,LOOP_NUM_BLOCKS," -- "LOOP_NUM_BAKCEDGES,LOCAL_EXITING_BLOCK,LOCAL_LATCH_BLOCK," -- "LOCAL_LOOP_HEADER,CALL,DELTA_TAKEN,NUM_LOADS,NUM_STORES," -- "NUM_CALLS,OPERAND_RA_TYPE,OPERAND_RB_TYPE,BASIC_BLOCK_SIZE," -- "NUM_BASIC_BLOCKS,NUM_CALLS_INVOKE,NUM_INDIRECT_CALLS," -- "NUM_TAIL_CALLS,NUM_SELF_CALLS,TS_DOMINATES,TS_POSTDOMINATES," -- "TS_END_OPCODE,TS_END_OPCODE_STR,TS_LOOP_HEADER,TS_BACKEDGE,TS_" -- "EXIT,TS_CALL," -- "TS_FROM_FUN_NAME,TS_FROM_BB,TS_TO_FUN_NAME,TS_TO_BB,TS_NUM_LOADS," -- "TS_NUM_STORES,TS_BASIC_BLOCK_SIZE,TS_NUM_CALLS,TS_NUM_CALLS_EXIT," -- "TS_NUM_INDIRECT_CALL,TS_NUM_CALLS_INVOKE,TS_NUM_TAIL_CALLS," -- "FS_DOMINATES,FS_POSTDOMINATES,FS_END_OPCODE,FS_END_OPCODE_STR,FS_" -- "LOOP_HEADER," -- "FS_BACKEDGE,FS_EXIT,FS_CALL,FS_FROM_FUN_NAME,FS_FROM_BB," -- "FS_TO_FUN_NAME,FS_TO_BB,FS_NUM_LOADS,FS_NUM_STORES," -- "FS_BASIC_BLOCK_SIZE,FS_NUM_CALLS,FS_NUM_CALLS_EXIT," -- "FS_NUM_INDIRECT_CALL,FS_NUM_CALLS_INVOKE,FS_NUM_TAIL_CALLS," -- "FUN_ENTRY_ADDRESS,FUN_ENTRY_FREQUENCY" -- ",FUN_UNIQUE_NUMBER,FUN_ONE_NAME,FUN_PRINT_NAME," -- "BRANCH_ADDRESS\n"; -- -- auto &BFs = BC.getBinaryFunctions(); - SBI = std::make_unique(); -- for (auto &BFI : BFs) { -- BinaryFunction &Function = BFI.second; - -- if (Function.empty() || (Function.hasValidProfile() && opts::IncludeValidProfile)) -- continue; -- -- if (!Function.isLoopFree()) { -- const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); -- SBI->findLoopEdgesInfo(LoopsInfo); -- } -- extractFeatures(Function, BC, Printer); -- -- SBI->clear(); -+ if (Function.empty()) -+ return; - -- // dumpProfileData(Function, Printer2); -+ if (!Function.isLoopFree()) { -+ const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); -+ SBI->findLoopEdgesInfo(LoopsInfo); - } - -- outs() << "BOLT-INFO: Dumping two-way conditional branches' features" -- << " at " << FileName << "\n"; --} -- --/*void FeatureMiner::dumpProfileData(BinaryFunction &Function, -- raw_ostream &Printer) { -- - BinaryContext &BC = Function.getBinaryContext(); -+ extractFeatures(Function, BC); - -- std::string FromFunName = Function.getPrintName(); -- for (auto &BB : Function) { -- auto LastInst = BB.getLastNonPseudoInstr(); -- -- for (auto &Inst : BB) { -- if (!BC.MIB->isCall(Inst) && !BC.MIB->isBranch(Inst) && -- LastInst != (&Inst)) -- continue; -- -- auto Offset = BC.MIB->tryGetAnnotationAs(Inst, "Offset"); -- -- if (!Offset) -- continue; -- -- uint64_t TakenFreqEdge = 0; -- auto FromBb = Offset.get(); -- std::string ToFunName; -- uint32_t ToBb; -- -- if (BC.MIB->isCall(Inst)) { -- auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst); -- if (!CalleeSymbol) -- continue; -- -- ToFunName = CalleeSymbol->getName(); -- ToBb = 0; -+ SBI->clear(); -+} - -- if (BC.MIB->getConditionalTailCall(Inst)) { -+void FeatureMiner::generateProfileFeatures(BinaryBasicBlock *BB, BinaryBasicBlockFeature *BBF) { -+ int32_t parentChildNum, parentCount, childParentNum, childCount; - -- if (BC.MIB->hasAnnotation(Inst, "CTCTakenCount")) { -- auto CountAnnt = -- BC.MIB->tryGetAnnotationAs(Inst, "CTCTakenCount"); -- if (CountAnnt) { -- TakenFreqEdge = (*CountAnnt); -- } -- } -- } else { -- if (BC.MIB->hasAnnotation(Inst, "Count")) { -- auto CountAnnt = -- BC.MIB->tryGetAnnotationAs(Inst, "Count"); -- if (CountAnnt) { -- TakenFreqEdge = (*CountAnnt); -- } -- } -- } -+ if (BB->ParentSet.size() == 0) { -+ parentChildNum = -1; -+ parentCount = -1; -+ } else { -+ parentChildNum = std::numeric_limits::max(); -+ parentCount = 0; -+ for (BinaryBasicBlock *parent: BB->ParentSet) { -+ if (parent->ChildrenSet.size() < parentChildNum) { -+ parentChildNum = parent->ChildrenSet.size(); -+ parentCount = parent->getExecutionCount(); -+ } else if (parent->ChildrenSet.size() == parentChildNum && parent->getExecutionCount() > parentCount) { -+ parentCount = parent->getExecutionCount(); -+ } -+ } -+ } - -- if (TakenFreqEdge > 0) -- Printer << "1 " << FromFunName << " " << Twine::utohexstr(FromBb) -- << " 1 " << ToFunName << " " << Twine::utohexstr(ToBb) << " " -- << 0 << " " << TakenFreqEdge << "\n"; -- } else { -- for (BinaryBasicBlock *SuccBB : BB.successors()) { -- TakenFreqEdge = BB.getBranchInfo(*SuccBB).Count; -- BinaryFunction *ToFun = SuccBB->getFunction(); -- ToFunName = ToFun->getPrintName(); -- ToBb = SuccBB->getInputOffset(); -- -- if (TakenFreqEdge > 0) -- Printer << "1 " << FromFunName << " " << Twine::utohexstr(FromBb) -- << " 1 " << ToFunName << " " << Twine::utohexstr(ToBb) -- << " " << 0 << " " << TakenFreqEdge << "\n"; -- } -+ if (BB->ChildrenSet.size() == 0) { -+ childParentNum = -1; -+ childCount = -1; -+ } else { -+ childParentNum = std::numeric_limits::max(); -+ childCount = 0; -+ for (BinaryBasicBlock *child: BB->ChildrenSet) { -+ if (child->ParentSet.size() < childParentNum) { -+ childParentNum = child->ParentSet.size(); -+ childCount = child->getExecutionCount(); -+ } else if (child->ParentSet.size() == childParentNum && child->getExecutionCount() > childCount) { -+ childCount = child->getExecutionCount(); - } - } - } -+ -+ int64_t parentCountCatch = parentCount > 0 ? 1 : 0; -+ int64_t childCountCatch = childCount > 0 ? 1 : 0; -+ -+ BBF->setParentChildNum(parentChildNum); -+ BBF->setParentCount(parentCountCatch); -+ BBF->setChildParentNum(childParentNum); -+ BBF->setChildCount(childCountCatch); -+ - } --*/ - - } // namespace bolt - } // namespace llvm -\ No newline at end of file -diff --git a/bolt/lib/Passes/StaticBranchInfo.cpp b/bolt/lib/Passes/StaticBranchInfo.cpp -index 13426b397..5a3e0ec9c 100644 ---- a/bolt/lib/Passes/StaticBranchInfo.cpp -+++ b/bolt/lib/Passes/StaticBranchInfo.cpp -@@ -15,10 +15,9 @@ - // - //===----------------------------------------------------------------------===// - --// #include "Passes/StaticBranchInfo.h" --// #include "BinaryBasicBlock.h" - #include "bolt/Passes/StaticBranchInfo.h" - #include "bolt/Core/BinaryBasicBlock.h" -+#include "bolt/Core/BinaryLoop.h" - - namespace llvm { - namespace bolt { -@@ -90,7 +89,6 @@ bool StaticBranchInfo::isExitEdge(const BinaryLoop::Edge &CFGEdge) const { - - bool StaticBranchInfo::isExitEdge(const BinaryBasicBlock *SrcBB, - const BinaryBasicBlock *DstBB) const { --// const BinaryLoop::Edge CFGEdge = std::make_pair(SrcBB, DstBB); - const BinaryLoop::Edge CFGEdge = std::make_pair(const_cast(SrcBB), const_cast(DstBB)); - return isExitEdge(CFGEdge); - } -@@ -159,4 +157,4 @@ void StaticBranchInfo::clear() { - } - - } // namespace bolt --} // namespace llvm -\ No newline at end of file -+} // namespace llvm -diff --git a/bolt/lib/Profile/CMakeLists.txt b/bolt/lib/Profile/CMakeLists.txt -index f4397e331..2273f4572 100644 ---- a/bolt/lib/Profile/CMakeLists.txt -+++ b/bolt/lib/Profile/CMakeLists.txt -@@ -11,6 +11,7 @@ add_llvm_library(LLVMBOLTProfile - Support - ) - -+ - target_link_libraries(LLVMBOLTProfile - PRIVATE - LLVMBOLTCore -diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp -index a51201d3b..2ae715049 100644 ---- a/bolt/lib/Profile/DataReader.cpp -+++ b/bolt/lib/Profile/DataReader.cpp -@@ -11,33 +11,48 @@ - // - //===----------------------------------------------------------------------===// - --#include "bolt/Profile/DataReader.h" - #include "bolt/Core/BinaryFunction.h" -+#include "bolt/Passes/FeatureMiner.h" - #include "bolt/Passes/MCF.h" -+#include "bolt/Profile/DataReader.h" - #include "bolt/Utils/Utils.h" - #include "llvm/Support/CommandLine.h" - #include "llvm/Support/Debug.h" - #include "llvm/Support/Errc.h" -+#include -+#include -+#include - #include -+#include - --#undef DEBUG_TYPE -+#undef DEBUG_TYPE - #define DEBUG_TYPE "bolt-prof" - - using namespace llvm; - - namespace opts { -- -+extern cl::opt BlockCorrection; - extern cl::OptionCategory BoltCategory; - extern llvm::cl::opt Verbosity; - --static cl::opt --DumpData("dump-data", -- cl::desc("dump parsed bolt data for debugging"), -- cl::Hidden, -- cl::cat(BoltCategory)); -+static cl::opt InputModelFilename("model-path", -+ cl::desc(""), -+ cl::Optional, -+ cl::cat(BoltCategory)); -+ -+static cl::opt DumpData("dump-data", -+ cl::desc("dump parsed bolt data for debugging"), -+ cl::Hidden, cl::cat(BoltCategory)); - - } // namespace opts - -+extern "C" { -+typedef void *(*CreateONNXRunnerFunc)(const char *); -+typedef void (*DeleteONNXRunnerFunc)(void *); -+typedef float (*RunONNXModelFunc)(void *, std::vector &, -+ std::vector &, std::vector &); -+} -+ - namespace llvm { - namespace bolt { - -@@ -322,6 +337,16 @@ Error DataReader::readProfilePreCFG(BinaryContext &BC) { - } - - Error DataReader::readProfile(BinaryContext &BC) { -+ -+ if (opts::BlockCorrection) { -+ if (opts::InputModelFilename.empty()) { -+ outs() << "llvm-bolt: expected -model-path= option.\n"; -+ exit(1); -+ } else { -+ DataReader::initializeONNXRunner(opts::InputModelFilename); -+ } -+ } -+ - for (auto &BFI : BC.getBinaryFunctions()) { - BinaryFunction &Function = BFI.second; - readProfile(Function); -@@ -333,6 +358,12 @@ Error DataReader::readProfile(BinaryContext &BC) { - ++NumUnused; - BC.setNumUnusedProfiledObjects(NumUnused); - -+ if (opts::BlockCorrection) { -+ uint64_t modified_total = DataReader::getModifiedBBTotal(); -+ outs() << "BOLT-INFO: total modified CFG BB count number is " -+ << modified_total << ".\n"; -+ } -+ - return Error::success(); - } - -@@ -564,11 +595,75 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, - return MatchRatio; - } - -+void generateChildrenParentCount(BinaryBasicBlock *BB) { -+ typedef GraphTraits GraphT; -+ -+ for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), -+ E = GraphT::child_end(BB); -+ CI != E; ++CI) { -+ typename GraphT::NodeRef Child = *CI; -+ BB->ChildrenSet.insert(Child); -+ Child->ParentSet.insert(BB); -+ } -+} -+ -+void generateChildrenParentCount(BinaryFunction &BF) { -+ for (BinaryBasicBlock &BB : BF) { -+ generateChildrenParentCount(&BB); -+ } -+} -+ -+uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryBasicBlock *BB, -+ float threshold) { -+ uint64_t modified = 0; -+ std::vector input_string; -+ std::vector input_int64; -+ std::vector input_float; -+ -+ BinaryBasicBlockFeature BBF = BB->getFeatures(); -+ input_int64 = BBF.getInferenceFeatures(); -+ -+ if (input_int64.empty()) { -+ return 0; -+ } -+ -+ float model_pred = -+ dataReaderRef->ONNXInference(input_string, input_int64, input_float); -+ if (BB->getExecutionCount() == 0 && model_pred >= threshold) { -+ uint64_t min_neighbor_count = std::numeric_limits::max(); -+ for (BinaryBasicBlock *parent : BB->ParentSet) { -+ if (parent->getExecutionCount() > 0 && -+ parent->getExecutionCount() < min_neighbor_count) -+ min_neighbor_count = parent->getExecutionCount(); -+ } -+ for (BinaryBasicBlock *child : BB->ChildrenSet) { -+ if (child->getExecutionCount() > 0 && -+ child->getExecutionCount() < min_neighbor_count) -+ min_neighbor_count = child->getExecutionCount(); -+ } -+ if (min_neighbor_count != std::numeric_limits::max()) { -+ BB->setExecutionCount(min_neighbor_count); -+ modified = 1; -+ } -+ } -+ return modified; -+} -+ -+uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryFunction &BF, -+ float threshold) { -+ uint64_t modified_total_func = 0; -+ const auto &Order = BF.dfs(); -+ for (auto *BBA : Order) { -+ auto &BB = *BBA; -+ modified_total_func += estimateBBCount(dataReaderRef, &BB, threshold); -+ } -+ return modified_total_func; -+} -+ - void DataReader::readSampleData(BinaryFunction &BF) { - FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); - if (!SampleDataOrErr) - return; -- - // Basic samples mode territory (without LBR info) - // First step is to assign BB execution count based on samples from perf - BF.ProfileMatchRatio = 1.0f; -@@ -609,6 +704,17 @@ void DataReader::readSampleData(BinaryFunction &BF) { - - BF.ExecutionCount = TotalEntryCount; - -+ if (opts::BlockCorrection) { -+ generateChildrenParentCount(BF); -+ std::unique_ptr FM = -+ std::make_unique(opts::BlockCorrection); -+ FM->inferenceFeatures(BF); -+ -+ float threshold = DataReader::getThreshold(); -+ uint64_t modified_total_func = estimateBBCount(this, BF, threshold); -+ DataReader::addModifiedBBTotal(modified_total_func); -+ } -+ - estimateEdgeCounts(BF); - } - -@@ -1324,7 +1430,7 @@ std::vector fetchMapEntriesRegex( - return AllData; - } - --} -+} // namespace - - bool DataReader::mayHaveProfileData(const BinaryFunction &Function) { - if (getBranchData(Function) || getMemData(Function)) -diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp -index fd21724c5..3bed5c16e 100644 ---- a/bolt/lib/Rewrite/RewriteInstance.cpp -+++ b/bolt/lib/Rewrite/RewriteInstance.cpp -@@ -16,6 +16,7 @@ - #include "bolt/Core/ParallelUtilities.h" - #include "bolt/Core/Relocation.h" - #include "bolt/Passes/CacheMetrics.h" -+#include "bolt/Passes/FeatureMiner.h" - #include "bolt/Passes/ReorderFunctions.h" - #include "bolt/Profile/BoltAddressTranslation.h" - #include "bolt/Profile/DataAggregator.h" -@@ -104,6 +105,12 @@ cl::opt DumpDotAll( - "enable '-print-loops' for color-coded blocks"), - cl::Hidden, cl::cat(BoltCategory)); - -+cl::opt BlockCorrection( -+ "block-correction", -+ cl::desc("capture features useful for ML model to inference the count on the binary basic block" -+ " and correct them on CFG."), -+ cl::ZeroOrMore, cl::cat(BoltOptCategory)); -+ - static cl::list - ForceFunctionNames("funcs", - cl::CommaSeparated, --- -2.33.0 - diff --git a/0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch b/0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch new file mode 100644 index 0000000000000000000000000000000000000000..eda8d214b70aac572d487ff8a70e5233955d4ba5 --- /dev/null +++ b/0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch @@ -0,0 +1,130 @@ +From 28e7e71251dc4b79c29aa0d4904cb424f9081455 Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Fri, 21 Jun 2024 11:23:42 +0800 +Subject: [PATCH] [BOLT][AArch64] Don't change layout in PatchEntries + +--- + bolt/lib/Passes/PatchEntries.cpp | 11 ++++++++ + bolt/test/AArch64/patch-entries.s | 36 ++++++++++++++++++++++++ + bolt/unittests/Core/BinaryContext.cpp | 40 +++++++++++++++++++++++++++ + 3 files changed, 87 insertions(+) + create mode 100644 bolt/test/AArch64/patch-entries.s + +diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp +index 02a044d8b..ee7512d89 100644 +--- a/bolt/lib/Passes/PatchEntries.cpp ++++ b/bolt/lib/Passes/PatchEntries.cpp +@@ -98,6 +98,17 @@ void PatchEntries::runOnFunctions(BinaryContext &BC) { + }); + + if (!Success) { ++ // We can't change output layout for AArch64 due to LongJmp pass ++ if (BC.isAArch64()) { ++ if (opts::ForcePatch) { ++ errs() << "BOLT-ERROR: unable to patch entries in " << Function ++ << "\n"; ++ exit(1); ++ } ++ ++ continue; ++ } ++ + // If the original function entries cannot be patched, then we cannot + // safely emit new function body. + errs() << "BOLT-WARNING: failed to patch entries in " << Function +diff --git a/bolt/test/AArch64/patch-entries.s b/bolt/test/AArch64/patch-entries.s +new file mode 100644 +index 000000000..cf6f72a0b +--- /dev/null ++++ b/bolt/test/AArch64/patch-entries.s +@@ -0,0 +1,36 @@ ++# This test checks patch entries functionality ++ ++# REQUIRES: system-linux ++ ++# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ ++# RUN: %s -o %t.o ++# RUN: %clang %cflags -pie %t.o -o %t.exe -nostdlib -Wl,-q ++# RUN: llvm-bolt %t.exe -o %t.bolt --use-old-text=0 --lite=0 --skip-funcs=_start ++# RUN: llvm-objdump -dz %t.bolt | FileCheck %s ++ ++# CHECK: : ++# CHECK-NEXT: adrp x16, 0x[[#%x,ADRP:]] ++# CHECK-NEXT: add x16, x16, #0x[[#%x,ADD:]] ++# CHECK-NEXT: br x16 ++ ++# CHECK: [[#ADRP + ADD]] : ++# CHECK-NEXT: [[#ADRP + ADD]]: {{.*}} ret ++ ++.text ++.balign 4 ++.global pathedEntries ++.type pathedEntries, %function ++pathedEntries: ++ .rept 32 ++ nop ++ .endr ++ ret ++.size pathedEntries, .-pathedEntries ++ ++.global _start ++.type _start, %function ++_start: ++ bl pathedEntries ++ .inst 0xdeadbeef ++ ret ++.size _start, .-_start +diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp +index 5a80cb4a2..7ac1c1435 100644 +--- a/bolt/unittests/Core/BinaryContext.cpp ++++ b/bolt/unittests/Core/BinaryContext.cpp +@@ -62,6 +62,46 @@ INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester, + INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester, + ::testing::Values(Triple::aarch64)); + ++TEST_P(BinaryContextTester, FlushPendingRelocCALL26) { ++ if (GetParam() != Triple::aarch64) ++ GTEST_SKIP(); ++ ++ // This test checks that encodeValueAArch64 used by flushPendingRelocations ++ // returns correctly encoded values for CALL26 relocation for both backward ++ // and forward branches. ++ // ++ // The offsets layout is: ++ // 4: func1 ++ // 8: bl func1 ++ // 12: bl func2 ++ // 16: func2 ++ ++ char Data[20] = {}; ++ BinarySection &BS = BC->registerOrUpdateSection( ++ ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, ++ (uint8_t *)Data, sizeof(Data), 4); ++ MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1"); ++ ASSERT_TRUE(RelSymbol1); ++ BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true); ++ MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2"); ++ ASSERT_TRUE(RelSymbol2); ++ BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true); ++ ++ std::error_code EC; ++ SmallVector Vect(sizeof(Data)); ++ raw_svector_ostream OS(Vect); ++ ++ BS.flushPendingRelocations(OS, [&](const MCSymbol *S) { ++ return S == RelSymbol1 ? 4 : S == RelSymbol2 ? 16 : 0; ++ }); ++ ++ const uint8_t Func1Call[4] = {255, 255, 255, 151}; ++ const uint8_t Func2Call[4] = {1, 0, 0, 148}; ++ ++ EXPECT_FALSE(memcmp(Func1Call, &Vect[8], 4)) << "Wrong backward call value\n"; ++ EXPECT_FALSE(memcmp(Func2Call, &Vect[12], 4)) << "Wrong forward call value\n"; ++} ++ + #endif + + TEST_P(BinaryContextTester, BaseAddress) { +-- +2.39.2 (Apple Git-143) + diff --git a/0004-Added-open-source-code-related-to-feature-extracting.patch b/0006-AArch64-Add-CFG-block-count-correction-optimization.patch similarity index 34% rename from 0004-Added-open-source-code-related-to-feature-extracting.patch rename to 0006-AArch64-Add-CFG-block-count-correction-optimization.patch index 3bd91e15efe558e8eaf8879524df65ab2ee1a683..b90b76d9461c27694516afc455562b5889485b0b 100644 --- a/0004-Added-open-source-code-related-to-feature-extracting.patch +++ b/0006-AArch64-Add-CFG-block-count-correction-optimization.patch @@ -1,29 +1,425 @@ -From 96eff0ec88e75a49cc186476efd84370e6137b42 Mon Sep 17 00:00:00 2001 -From: h00502206 -Date: Tue, 4 Jun 2024 20:18:05 +0800 -Subject: [PATCH] Added open-source code related to feature extracting from - 'angelica-moreira: https://github.com/angelica-moreira/BOLT' on the basis of - llvm-bolt, and modified some code to pass the compilation. +From 25c9e9c7d4532f6e8962a25c5c7087bf3e3b8445 Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Thu, 25 Jul 2024 14:45:53 +0800 +Subject: [PATCH] Add CFG block count correction optimization. --- - .../bolt/include/bolt/Passes/FeatureMiner.h | 178 +++ - .../include/bolt/Passes/StaticBranchInfo.h | 116 ++ - bolt/lib/Passes/CMakeLists.txt | 2 + - bolt/lib/Passes/FeatureMiner.cpp | 1067 +++++++++++++++++ - .../bolt/lib/Passes/StaticBranchInfo.cpp | 162 +++ - 5 files changed, 1525 insertions(+) + bolt/include/bolt/Core/BinaryBasicBlock.h | 59 +- + .../bolt/Core/BinaryBasicBlockFeature.h | 268 ++++++++ + bolt/include/bolt/Passes/FeatureMiner.h | 176 ++++++ + bolt/include/bolt/Passes/StaticBranchInfo.h | 108 ++++ + bolt/include/bolt/Profile/DataReader.h | 93 ++- + bolt/lib/Core/BinaryBasicBlockFeature.cpp | 21 + + bolt/lib/Core/CMakeLists.txt | 1 + + bolt/lib/Passes/CMakeLists.txt | 2 + + bolt/lib/Passes/FeatureMiner.cpp | 572 ++++++++++++++++++ + bolt/lib/Passes/StaticBranchInfo.cpp | 143 +++++ + bolt/lib/Profile/DataReader.cpp | 120 +++- + bolt/lib/Rewrite/RewriteInstance.cpp | 6 + + 12 files changed, 1557 insertions(+), 12 deletions(-) + create mode 100644 bolt/include/bolt/Core/BinaryBasicBlockFeature.h create mode 100644 bolt/include/bolt/Passes/FeatureMiner.h create mode 100644 bolt/include/bolt/Passes/StaticBranchInfo.h + create mode 100644 bolt/lib/Core/BinaryBasicBlockFeature.cpp create mode 100644 bolt/lib/Passes/FeatureMiner.cpp create mode 100644 bolt/lib/Passes/StaticBranchInfo.cpp +diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h +index 02be9c1d4..a39d38d6b 100644 +--- a/bolt/include/bolt/Core/BinaryBasicBlock.h ++++ b/bolt/include/bolt/Core/BinaryBasicBlock.h +@@ -15,6 +15,7 @@ + #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H + #define BOLT_CORE_BINARY_BASIC_BLOCK_H + ++#include "bolt/Core/BinaryBasicBlockFeature.h" + #include "bolt/Core/FunctionLayout.h" + #include "bolt/Core/MCPlus.h" + #include "llvm/ADT/GraphTraits.h" +@@ -25,6 +26,7 @@ + #include "llvm/Support/raw_ostream.h" + #include + #include ++#include + + namespace llvm { + class MCCodeEmitter; +@@ -147,6 +149,12 @@ private: + /// Last computed hash value. + mutable uint64_t Hash{0}; + ++ std::set ChildrenSet; ++ ++ std::set ParentSet; ++ ++ BinaryBasicBlockFeature BlockFeatures; ++ + private: + BinaryBasicBlock() = delete; + BinaryBasicBlock(const BinaryBasicBlock &) = delete; +@@ -385,11 +393,14 @@ public: + /// If the basic block ends with a conditional branch (possibly followed by + /// an unconditional branch) and thus has 2 successors, return a successor + /// corresponding to a jump condition which could be true or false. +- /// Return nullptr if the basic block does not have a conditional jump. ++ /// Return the only successor if it's followed by an unconditional branch. ++ /// Return nullptr otherwise. + BinaryBasicBlock *getConditionalSuccessor(bool Condition) { +- if (succ_size() != 2) +- return nullptr; +- return Successors[Condition == true ? 0 : 1]; ++ if (succ_size() == 2) ++ return Successors[Condition == true ? 0 : 1]; ++ if (succ_size() == 1) ++ return Successors[0]; ++ return nullptr; + } + + const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { +@@ -410,6 +421,13 @@ public: + return const_cast(this)->getFallthrough(); + } + ++ /// Return branch info corresponding to only branch. ++ const BinaryBranchInfo &getOnlyBranchInfo() const { ++ assert(BranchInfo.size() > 0 && ++ "could only be called for blocks with at least 1 successor"); ++ return BranchInfo[0]; ++ }; ++ + /// Return branch info corresponding to a taken branch. + const BinaryBranchInfo &getTakenBranchInfo() const { + assert(BranchInfo.size() == 2 && +@@ -818,6 +836,36 @@ public: + OutputAddressRange.second = Address; + } + ++ /// Sets features of this BB. ++ void setFeatures(BinaryBasicBlockFeature BBF) { ++ BlockFeatures = BBF; ++ } ++ ++ /// Gets numberic features of this BB. ++ BinaryBasicBlockFeature getFeatures() { ++ return BlockFeatures; ++ } ++ ++ /// Gets children sets of this BB. ++ std::set getChildrenSet() { ++ return ChildrenSet; ++ } ++ ++ /// Gets parent sets of this BB. ++ std::set getParentSet() { ++ return ParentSet; ++ } ++ ++ /// Inserts children sets of this BB. ++ void insertChildrenSet(BinaryBasicBlock *Node) { ++ ChildrenSet.insert(Node); ++ } ++ ++ /// Inserts parent sets of this BB. ++ void insertParentSet(BinaryBasicBlock *Node) { ++ ParentSet.insert(Node); ++ } ++ + /// Gets the memory address range of this BB in the input binary. + std::pair getInputAddressRange() const { + return InputRange; +@@ -991,7 +1039,8 @@ private: + #if defined(LLVM_ON_UNIX) + /// Keep the size of the BinaryBasicBlock within a reasonable size class + /// (jemalloc bucket) on Linux +-static_assert(sizeof(BinaryBasicBlock) <= 256); ++/// The size threshod is expanded from 256 to 2048 to contain the extra BB features ++static_assert(sizeof(BinaryBasicBlock) <= 2048, ""); + #endif + + bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); +diff --git a/bolt/include/bolt/Core/BinaryBasicBlockFeature.h b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h +new file mode 100644 +index 000000000..2b4809b1a +--- /dev/null ++++ b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h +@@ -0,0 +1,268 @@ ++//===- bolt/Core/BinaryBasicBlockFeature.h - Low-level basic block -----*- C++ ++//-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Features of BinaryBasicBlock ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H ++#define BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H ++ ++#include "bolt/Core/FunctionLayout.h" ++#include "bolt/Core/MCPlus.h" ++#include "llvm/ADT/GraphTraits.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/Support/ErrorOr.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++ ++namespace llvm { ++ ++namespace bolt { ++ ++class BinaryBasicBlockFeature { ++ ++public: ++ int32_t Opcode; ++ ++ int16_t Direction; ++ ++ int32_t CmpOpcode; ++ ++ int16_t LoopHeader; ++ ++ int16_t ProcedureType; ++ ++ int64_t Count; ++ ++ int64_t FallthroughCount; ++ ++ int64_t TotalLoops; ++ ++ int64_t LoopDepth; ++ ++ int64_t LoopNumBlocks; ++ ++ int64_t LocalExitingBlock; ++ ++ int64_t LocalLatchBlock; ++ ++ int64_t LocalLoopHeader; ++ ++ int64_t Call; ++ ++ int64_t DeltaTaken; ++ ++ int64_t NumLoads; ++ ++ int64_t NumCalls; ++ ++ int64_t OperandRAType; ++ ++ int64_t OperandRBType; ++ ++ int64_t BasicBlockSize; ++ ++ int64_t NumBasicBlocks; ++ ++ int64_t HasIndirectCalls; ++ ++ std::vector EndOpcode_vec; ++ ++ std::vector LoopHeader_vec; ++ ++ std::vector Backedge_vec; ++ ++ std::vector Exit_vec; ++ ++ std::vector Call_vec; ++ ++ std::vector BasicBlockSize_vec; ++ ++ std::vector InferenceFeatures; ++ ++ uint64_t FuncExec; ++ ++ int32_t ParentChildNum; ++ ++ int32_t ParentCount; ++ ++ int32_t ChildParentNum; ++ ++ int32_t ChildCount; ++ ++public: ++ void setOpcode(const int32_t &BlockOpcode) { Opcode = BlockOpcode; } ++ ++ void setDirection(const int16_t &BlockDirection) { ++ Direction = BlockDirection; ++ } ++ ++ void setCmpOpcode(const int32_t &BlockCmpOpcode) { ++ CmpOpcode = BlockCmpOpcode; ++ } ++ ++ void setLoopHeader(const int16_t &BlockLoopHeader) { ++ LoopHeader = BlockLoopHeader; ++ } ++ ++ void setProcedureType(const int16_t &BlockProcedureType) { ++ ProcedureType = BlockProcedureType; ++ } ++ ++ void setCount(const int64_t &BlockCount) { Count = BlockCount; } ++ ++ void setFallthroughCount(const int64_t &BlockFallthroughCount) { ++ FallthroughCount = BlockFallthroughCount; ++ } ++ ++ void setTotalLoops(const int64_t &BlockTotalLoops) { ++ TotalLoops = BlockTotalLoops; ++ } ++ ++ void setLoopDepth(const int64_t &BlockLoopDepth) { ++ LoopDepth = BlockLoopDepth; ++ } ++ ++ void setLoopNumBlocks(const int64_t &BlockLoopNumBlocks) { ++ LoopNumBlocks = BlockLoopNumBlocks; ++ } ++ ++ void setLocalExitingBlock(const int64_t &BlockLocalExitingBlock) { ++ LocalExitingBlock = BlockLocalExitingBlock; ++ } ++ ++ void setLocalLatchBlock(const int64_t &BlockLocalLatchBlock) { ++ LocalLatchBlock = BlockLocalLatchBlock; ++ } ++ ++ void setLocalLoopHeader(const int64_t &BlockLocalLoopHeader) { ++ LocalLoopHeader = BlockLocalLoopHeader; ++ } ++ ++ void setDeltaTaken(const int64_t &BlockDeltaTaken) { ++ DeltaTaken = BlockDeltaTaken; ++ } ++ ++ void setNumLoads(const int64_t &BlockNumLoads) { NumLoads = BlockNumLoads; } ++ ++ void setNumCalls(const int64_t &BlockNumCalls) { NumCalls = BlockNumCalls; } ++ ++ void setOperandRAType(const int64_t &BlockOperandRAType) { ++ OperandRAType = BlockOperandRAType; ++ } ++ ++ void setOperandRBType(const int64_t &BlockOperandRBType) { ++ OperandRBType = BlockOperandRBType; ++ } ++ ++ void setBasicBlockSize(const int64_t &BlockBasicBlockSize) { ++ BasicBlockSize = BlockBasicBlockSize; ++ } ++ ++ void setNumBasicBlocks(const int64_t &BlockNumBasicBlocks) { ++ NumBasicBlocks = BlockNumBasicBlocks; ++ } ++ ++ void setHasIndirectCalls(const int64_t &BlockHasIndirectCalls) { ++ HasIndirectCalls = BlockHasIndirectCalls; ++ } ++ ++ void setEndOpcodeVec(const int32_t &EndOpcode) { ++ EndOpcode_vec.push_back(EndOpcode); ++ } ++ ++ void setLoopHeaderVec(const int16_t &LoopHeader) { ++ LoopHeader_vec.push_back(LoopHeader); ++ } ++ ++ void setBackedgeVec(const int16_t &Backedge) { ++ Backedge_vec.push_back(Backedge); ++ } ++ ++ void setExitVec(const int16_t &Exit) { Exit_vec.push_back(Exit); } ++ ++ void setCallVec(const int16_t &Call) { Call_vec.push_back(Call); } ++ ++ void setBasicBlockSizeVec(const int64_t &BasicBlockSize) { ++ BasicBlockSize_vec.push_back(BasicBlockSize); ++ } ++ ++ void setFunExec(const uint64_t &BlockFuncExec) { FuncExec = BlockFuncExec; } ++ ++ void setParentChildNum(const int32_t &BlockParentChildNum) { ++ ParentChildNum = BlockParentChildNum; ++ } ++ ++ void setParentCount(const int32_t &BlockParentCount) { ++ ParentCount = BlockParentCount; ++ } ++ ++ void setChildParentNum(const int32_t &BlockChildParentNum) { ++ ChildParentNum = BlockChildParentNum; ++ } ++ ++ void setChildCount(const int32_t &BlockChildCount) { ++ ChildCount = BlockChildCount; ++ } ++ ++ void setInferenceFeatures() { ++ ++ if (Count == -1 || FallthroughCount == -1) { ++ return; ++ } ++ if (ParentChildNum == -1 && ParentCount == -1 && ChildParentNum == -1 && ++ ChildCount == -1) { ++ return; ++ } ++ ++ InferenceFeatures.push_back(static_cast(Direction)); ++ InferenceFeatures.push_back(static_cast(LoopHeader)); ++ InferenceFeatures.push_back(static_cast(ProcedureType)); ++ InferenceFeatures.push_back(static_cast(OperandRAType)); ++ InferenceFeatures.push_back(static_cast(OperandRBType)); ++ InferenceFeatures.push_back(static_cast(LoopHeader_vec[0])); ++ InferenceFeatures.push_back(static_cast(Backedge_vec[0])); ++ InferenceFeatures.push_back(static_cast(Exit_vec[0])); ++ InferenceFeatures.push_back(static_cast(LoopHeader_vec[1])); ++ InferenceFeatures.push_back(static_cast(Call_vec[0])); ++ InferenceFeatures.push_back(static_cast(LocalExitingBlock)); ++ InferenceFeatures.push_back(static_cast(HasIndirectCalls)); ++ InferenceFeatures.push_back(static_cast(LocalLatchBlock)); ++ InferenceFeatures.push_back(static_cast(LocalLoopHeader)); ++ InferenceFeatures.push_back(static_cast(Opcode)); ++ InferenceFeatures.push_back(static_cast(CmpOpcode)); ++ InferenceFeatures.push_back(static_cast(EndOpcode_vec[0])); ++ InferenceFeatures.push_back(static_cast(EndOpcode_vec[1])); ++ InferenceFeatures.push_back(static_cast(FuncExec)); ++ InferenceFeatures.push_back(static_cast(NumBasicBlocks)); ++ InferenceFeatures.push_back(static_cast(BasicBlockSize)); ++ InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[0])); ++ InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[1])); ++ InferenceFeatures.push_back(static_cast(LoopNumBlocks)); ++ InferenceFeatures.push_back(static_cast(NumLoads)); ++ InferenceFeatures.push_back(static_cast(NumCalls)); ++ InferenceFeatures.push_back(static_cast(TotalLoops)); ++ InferenceFeatures.push_back(static_cast(DeltaTaken)); ++ InferenceFeatures.push_back(static_cast(LoopDepth)); ++ InferenceFeatures.push_back(static_cast(ParentChildNum)); ++ InferenceFeatures.push_back(static_cast(ParentCount)); ++ InferenceFeatures.push_back(static_cast(ChildParentNum)); ++ InferenceFeatures.push_back(static_cast(ChildCount)); ++ } ++ ++ std::vector getInferenceFeatures() { return InferenceFeatures; } ++}; ++} // namespace bolt ++} // namespace llvm ++ ++#endif +\ No newline at end of file diff --git a/bolt/include/bolt/Passes/FeatureMiner.h b/bolt/include/bolt/Passes/FeatureMiner.h new file mode 100644 -index 000000000..916e5515d +index 000000000..6170aa62d --- /dev/null +++ b/bolt/include/bolt/Passes/FeatureMiner.h -@@ -0,0 +1,178 @@ -+//===--- Passes/FeatureMiner.h ---------------------------------------------===// +@@ -0,0 +1,176 @@ ++//===--- Passes/FeatureMiner.h ++//---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// @@ -39,25 +435,18 @@ index 000000000..916e5515d +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ + -+// #include "BinaryContext.h" -+// #include "BinaryFunction.h" -+// #include "BinaryLoop.h" -+// #include "DominatorAnalysis.h" -+// #include "Passes/BinaryPasses.h" -+// #include "Passes/StaticBranchInfo.h" +#include "bolt/Core/BinaryData.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" -+#include "bolt/Passes/DominatorAnalysis.h" +#include "bolt/Passes/BinaryPasses.h" ++#include "bolt/Passes/DominatorAnalysis.h" +#include "bolt/Passes/StaticBranchInfo.h" -+ +#include "llvm/ADT/DenseMap.h" -+#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include ++#include +#include +#include +#include @@ -68,32 +457,31 @@ index 000000000..916e5515d +class FeatureMiner : public BinaryFunctionPass { +private: + std::unique_ptr SBI; -+ + /// BasicBlockInfo - This structure holds feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + struct BasicBlockInfo { -+ Optional BranchDominates; // 1 - dominates, 0 - does not dominate -+ Optional BranchPostdominates; // 1 - postdominates, 0 - does not PD -+ Optional LoopHeader; // 1 - loop header, 0 - not a loop header -+ Optional Backedge; // 1 - loop back, 0 - not a loop back -+ Optional Exit; // 1 - loop exit, 0 - not a loop exit -+ Optional Call; // 1 - program call, 0 - not a program call -+ Optional NumCalls; -+ Optional NumLoads; -+ Optional NumStores; -+ Optional EndOpcode; // 0 = NOTHING -+ StringRef EndOpcodeStr = "UNDEF"; -+ Optional BasicBlockSize; ++ std::optional BranchDominates; // 1 - dominates, 0 - does not dominate ++ std::optional ++ BranchPostdominates; // 1 - postdominates, 0 - does not PD ++ std::optional LoopHeader; // 1 - loop header, 0 - not a loop header ++ std::optional Backedge; // 1 - loop back, 0 - not a loop back ++ std::optional Exit; // 1 - loop exit, 0 - not a loop exit ++ std::optional Call; // 1 - program call, 0 - not a program call ++ std::optional NumCalls; ++ std::optional NumLoads; ++ std::optional NumStores; ++ std::optional EndOpcode; // 0 = NOTHING ++ std::string EndOpcodeStr = "UNDEF"; ++ std::optional BasicBlockSize; + std::string FromFunName = "UNDEF"; + uint32_t FromBb; + std::string ToFunName = "UNDEF"; + uint32_t ToBb; + -+ -+ Optional NumCallsExit; -+ Optional NumCallsInvoke; -+ Optional NumIndirectCalls; -+ Optional NumTailCalls; ++ std::optional NumCallsExit; ++ std::optional NumCallsInvoke; ++ std::optional NumIndirectCalls; ++ std::optional NumTailCalls; + }; + + typedef std::unique_ptr BBIPtr; @@ -101,60 +489,61 @@ index 000000000..916e5515d + /// BranchFeaturesInfo - This structure holds feature information about each + /// two-way branch from the program. + struct BranchFeaturesInfo { -+ StringRef OpcodeStr = "UNDEF"; -+ StringRef CmpOpcodeStr = "UNDEF"; ++ std::string OpcodeStr = "UNDEF"; ++ std::string CmpOpcodeStr = "UNDEF"; + bool Simple = 0; + -+ Optional Opcode; -+ Optional CmpOpcode; -+ Optional Count; -+ Optional MissPredicted; -+ Optional FallthroughCount; -+ Optional FallthroughMissPredicted; ++ std::optional Opcode; ++ std::optional CmpOpcode; ++ std::optional Count; ++ std::optional MissPredicted; ++ std::optional FallthroughCount; ++ std::optional FallthroughMissPredicted; + BBIPtr TrueSuccessor = std::make_unique(); + BBIPtr FalseSuccessor = std::make_unique(); -+ Optional ProcedureType; // 1 - Leaf, 0 - NonLeaf, 2 - CallSelf -+ Optional LoopHeader; // 1 — loop header, 0 - not a loop header -+ Optional Direction; // 1 - Forward Branch, 0 - Backward Branch -+ -+ Optional NumOuterLoops; -+ Optional TotalLoops; -+ Optional MaximumLoopDepth; -+ Optional LoopDepth; -+ Optional LoopNumExitEdges; -+ Optional LoopNumExitBlocks; -+ Optional LoopNumExitingBlocks; -+ Optional LoopNumLatches; -+ Optional LoopNumBlocks; -+ Optional LoopNumBackEdges; -+ Optional NumLoads; -+ Optional NumStores; -+ -+ Optional LocalExitingBlock; -+ Optional LocalLatchBlock; -+ Optional LocalLoopHeader; -+ Optional Call; -+ -+ Optional NumCalls; -+ Optional NumCallsExit; -+ Optional NumCallsInvoke; -+ Optional NumIndirectCalls; -+ Optional NumTailCalls; -+ Optional NumSelfCalls; -+ -+ Optional NumBasicBlocks; -+ -+ Optional DeltaTaken; -+ -+ Optional OperandRAType; -+ Optional OperandRBType; -+ -+ Optional BasicBlockSize; -+ -+ Optional BranchOffset; ++ std::optional ProcedureType; // 1 - Leaf, 0 - NonLeaf, 2 - CallSelf ++ std::optional LoopHeader; // 1 — loop header, 0 - not a loop header ++ std::optional Direction; // 1 - Forward Branch, 0 - Backward Branch ++ ++ std::optional NumOuterLoops; ++ std::optional TotalLoops; ++ std::optional MaximumLoopDepth; ++ std::optional LoopDepth; ++ std::optional LoopNumExitEdges; ++ std::optional LoopNumExitBlocks; ++ std::optional LoopNumExitingBlocks; ++ std::optional LoopNumLatches; ++ std::optional LoopNumBlocks; ++ std::optional LoopNumBackEdges; ++ std::optional NumLoads; ++ std::optional NumStores; ++ ++ std::optional LocalExitingBlock; ++ std::optional LocalLatchBlock; ++ std::optional LocalLoopHeader; ++ std::optional Call; ++ ++ std::optional NumCalls; ++ std::optional NumCallsExit; ++ std::optional NumCallsInvoke; ++ std::optional NumIndirectCalls; ++ std::optional NumTailCalls; ++ std::optional NumSelfCalls; ++ ++ std::optional NumBasicBlocks; ++ ++ std::optional DeltaTaken; ++ ++ std::optional OperandRAType; ++ std::optional OperandRBType; ++ ++ std::optional BasicBlockSize; ++ ++ std::optional BranchOffset; + }; + + typedef std::unique_ptr BFIPtr; ++ + std::vector BranchesInfoSet; + + /// getProcedureType - Determines which category the function falls into: @@ -163,25 +552,24 @@ index 000000000..916e5515d + + /// addSuccessorInfo - Discovers feature information for the target successor + /// basic block, and inserts it into the static branch info container. -+ void addSuccessorInfo(DominatorAnalysis &DA, -+ DominatorAnalysis &PDA, BFIPtr const &BFI, -+ BinaryFunction &Function, BinaryContext &BC, -+ MCInst &Inst, BinaryBasicBlock &BB, bool Succ); ++ void addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, ++ BinaryContext &BC, BinaryBasicBlock &BB, bool SuccType); + + /// extractFeatures - Extracts the feature information for each two-way branch + /// from the program. -+ void extractFeatures(BinaryFunction &Function, -+ BinaryContext &BC, -+ raw_ostream &Printer); ++ void extractFeatures(BinaryFunction &Function, BinaryContext &BC); + ++ void generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, ++ BFIPtr const &BFI, int Index); + /// dumpSuccessorFeatures - Dumps the feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. -+ void dumpSuccessorFeatures(raw_ostream &Printer, BBIPtr &Successor); ++ void generateSuccessorFeatures(BBIPtr &Successor, ++ BinaryBasicBlockFeature *BBF); + + /// dumpFeatures - Dumps the feature information about each two-way branch + /// from the program. + void dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, -+ uint64_t FunctionFrequency); ++ uint64_t FunctionFrequency); + + /// dumpProfileData - Dumps a limited version of the inout profile data + /// that contains only profile for conditional branches, unconditional @@ -192,22 +580,26 @@ index 000000000..916e5515d + explicit FeatureMiner(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + ++ std::ofstream trainPrinter; ++ + const char *getName() const override { return "feature-miner"; } + + void runOnFunctions(BinaryContext &BC) override; ++ void inferenceFeatures(BinaryFunction &Function); ++ void generateProfileFeatures(BinaryBasicBlock *BB, ++ BinaryBasicBlockFeature *BBF); +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ -\ No newline at end of file diff --git a/bolt/include/bolt/Passes/StaticBranchInfo.h b/bolt/include/bolt/Passes/StaticBranchInfo.h new file mode 100644 -index 000000000..1713d3367 +index 000000000..8de8df793 --- /dev/null +++ b/bolt/include/bolt/Passes/StaticBranchInfo.h -@@ -0,0 +1,116 @@ +@@ -0,0 +1,108 @@ +//===------ Passes/StaticBranchInfo.h -------------------------------------===// +// +// The LLVM Compiler Infrastructure @@ -228,15 +620,10 @@ index 000000000..1713d3367 +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ + -+// #include "BinaryContext.h" -+// #include "BinaryFunction.h" -+// #include "BinaryLoop.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" -+ +#include "llvm/MC/MCSymbol.h" -+// add new include +#include + +namespace llvm { @@ -307,9 +694,6 @@ index 000000000..1713d3367 + /// hasStoreInst - Checks if the basic block has a store instruction. + bool hasStoreInst(const BinaryBasicBlock *BB) const; + -+ /// callToExit - Checks if a basic block invokes exit function. -+ bool callToExit(BinaryBasicBlock *BB, BinaryContext &BC) const; -+ + /// countBackEdges - Compute the number of BB's successor that are back edges. + unsigned countBackEdges(BinaryBasicBlock *BB) const; + @@ -324,20 +708,185 @@ index 000000000..1713d3367 +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ +diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h +index 916b4f7e2..bf732d47c 100644 +--- a/bolt/include/bolt/Profile/DataReader.h ++++ b/bolt/include/bolt/Profile/DataReader.h +@@ -22,6 +22,7 @@ + #include "llvm/Support/MemoryBuffer.h" + #include "llvm/Support/raw_ostream.h" + #include ++#include + #include + #include + +@@ -44,6 +45,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { + return OS; + } + ++extern "C" { ++typedef void *(*CreateONNXRunnerFunc)(const char *); ++typedef void (*DeleteONNXRunnerFunc)(void *); ++typedef std::vector (*RunONNXModelFunc)(void *, ++ const std::vector &, ++ const std::vector &, ++ const std::vector &, int); ++} ++ + struct Location { + bool IsSymbol; + StringRef Name; +@@ -263,7 +273,8 @@ struct FuncSampleData { + class DataReader : public ProfileReaderBase { + public: + explicit DataReader(StringRef Filename) +- : ProfileReaderBase(Filename), Diag(errs()) {} ++ : ProfileReaderBase(Filename), Diag(errs()), onnxRunner(nullptr), ++ libHandle(nullptr), handleOnnxRuntime(nullptr) {} + + StringRef getReaderName() const override { return "branch profile reader"; } + +@@ -282,7 +293,87 @@ public: + /// Return all event names used to collect this profile + StringSet<> getEventNames() const override { return EventNames; } + ++ ~DataReader() { ++ // delete onnxrunner; ++ if (onnxRunner && libHandle && handleOnnxRuntime) { ++ DeleteONNXRunnerFunc deleteONNXRunner = ++ (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); ++ deleteONNXRunner(onnxRunner); ++ dlclose(libHandle); ++ dlclose(handleOnnxRuntime); ++ } ++ } ++ ++ /// Initialize the onnxruntime model. ++ void initializeONNXRunner(const std::string &modelPath) { ++ if (!onnxRunner && !libHandle && !handleOnnxRuntime) { ++ handleOnnxRuntime = ++ dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); ++ if (handleOnnxRuntime == nullptr) { ++ outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; ++ exit(1); ++ } ++ libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); ++ if (libHandle == nullptr) { ++ outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; ++ exit(1); ++ } ++ CreateONNXRunnerFunc createONNXRunner = ++ (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner"); ++ onnxRunner = createONNXRunner(modelPath.c_str()); ++ } ++ } ++ ++ /// Inference step for predicting the BB counts based on the BB features. ++ float ONNXInference(const std::vector &input_string, ++ const std::vector &input_int64, ++ const std::vector &input_float, int batch_size = 1) { ++ if (onnxRunner && libHandle) { ++ RunONNXModelFunc runONNXModel = ++ (RunONNXModelFunc)dlsym(libHandle, "runONNXModel"); ++ std::vector model_preds = runONNXModel( ++ onnxRunner, input_string, input_int64, input_float, batch_size); ++ if (model_preds.size() <= 0) { ++ outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; ++ exit(1); ++ } ++ float pred = model_preds[0]; ++ return pred; ++ } ++ return -1.0; ++ } ++ ++ /// Return the annotating threshold for the model prediction. ++ void setThreshold(float annotate_threshold) { ++ threshold = annotate_threshold; ++ } ++ + protected: ++ /// The onnxruntime model pointer read from the input model path. ++ void *onnxRunner; ++ ++ /// The library handle of the ai4compiler framwork. ++ void *libHandle; ++ ++ /// The library handle of the onnxruntime. ++ void *handleOnnxRuntime; ++ ++ /// The annotating threshold for the model prediction. ++ float threshold; ++ ++ /// Return the annotating threshold for the model prediction. ++ float getThreshold() const { return threshold; } ++ ++ /// The counting value of the total modified BB-count number. ++ uint64_t modified_BB_total = 0; ++ ++ /// Add the total modified BB-count number by the BB modifiied number within ++ /// the funciton. ++ void addModifiedBBTotal(uint64_t &value) { modified_BB_total += value; } ++ ++ /// Return the counting value of the total modified BB-count number. ++ uint64_t getModifiedBBTotal() const { return modified_BB_total; } ++ + /// Read profile information available for the function. + void readProfile(BinaryFunction &BF); + +diff --git a/bolt/lib/Core/BinaryBasicBlockFeature.cpp b/bolt/lib/Core/BinaryBasicBlockFeature.cpp +new file mode 100644 +index 000000000..e1a2a3dd8 +--- /dev/null ++++ b/bolt/lib/Core/BinaryBasicBlockFeature.cpp +@@ -0,0 +1,21 @@ ++//===- bolt/Core/BinaryBasicBlockFeature.cpp - Low-level basic block ++//-------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements the BinaryBasicBlock class. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "bolt/Core/BinaryBasicBlock.h" ++#include "bolt/Core/BinaryBasicBlockFeature.h" ++ ++#define DEBUG_TYPE "bolt" ++ ++namespace llvm { ++namespace bolt {} // namespace bolt ++} // namespace llvm \ No newline at end of file +diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt +index a4612fb93..f93147d39 100644 +--- a/bolt/lib/Core/CMakeLists.txt ++++ b/bolt/lib/Core/CMakeLists.txt +@@ -12,6 +12,7 @@ set(LLVM_LINK_COMPONENTS + + add_llvm_library(LLVMBOLTCore + BinaryBasicBlock.cpp ++ BinaryBasicBlockFeature.cpp + BinaryContext.cpp + BinaryData.cpp + BinaryEmitter.cpp diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt -index bb296263b..901ff614c 100644 +index b8bbe59a6..e9ccea17c 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt -@@ -12,6 +12,7 @@ add_llvm_library(LLVMBOLTPasses - DataflowAnalysis.cpp +@@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses DataflowInfoManager.cpp - ExtTSPReorderAlgorithm.cpp -+ FeatureMiner.cpp FrameAnalysis.cpp FrameOptimizer.cpp ++ FeatureMiner.cpp + FixRelaxationPass.cpp + FixRISCVCallsPass.cpp HFSort.cpp -@@ -39,6 +40,7 @@ add_llvm_library(LLVMBOLTPasses +@@ -41,6 +42,7 @@ add_llvm_library(LLVMBOLTPasses StackAvailableExpressions.cpp StackPointerTracking.cpp StackReachingUses.cpp @@ -347,10 +896,10 @@ index bb296263b..901ff614c 100644 ThreeWayBranch.cpp diff --git a/bolt/lib/Passes/FeatureMiner.cpp b/bolt/lib/Passes/FeatureMiner.cpp new file mode 100644 -index 000000000..680222906 +index 000000000..d93aef648 --- /dev/null +++ b/bolt/lib/Passes/FeatureMiner.cpp -@@ -0,0 +1,1067 @@ +@@ -0,0 +1,572 @@ +//===--- Passes/FeatureMiner.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure @@ -364,15 +913,10 @@ index 000000000..680222906 +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + -+// #include "Passes/FeatureMiner.h" -+// #include "Passes/DataflowInfoManager.h" -+// #include "llvm/Support/CommandLine.h" -+// #include "llvm/Support/Options.h" -+#include "bolt/Passes/FeatureMiner.h" +#include "bolt/Passes/DataflowInfoManager.h" ++#include "bolt/Passes/FeatureMiner.h" ++#include "bolt/Passes/StaticBranchInfo.h" +#include "llvm/Support/CommandLine.h" -+ -+// add new include +#include "llvm/Support/FileSystem.h" + +#undef DEBUG_TYPE @@ -382,24 +926,7 @@ index 000000000..680222906 +using namespace bolt; + +namespace opts { -+ -+extern cl::OptionCategory InferenceCategory; -+ -+cl::opt VespaUseDFS( -+ "vespa-dfs", -+ cl::desc("use DFS ordering when using -gen-features option"), -+ cl::init(false), -+ cl::ReallyHidden, -+ cl::ZeroOrMore, -+ cl::cat(InferenceCategory)); -+ -+cl::opt IncludeValidProfile( -+ "beetle-valid-profile-info", -+ cl::desc("include valid profile information."), -+ cl::init(false), -+ cl::ReallyHidden, -+ cl::ZeroOrMore, -+ cl::cat(InferenceCategory)); ++extern cl::opt BlockCorrection; + +} // namespace opts + @@ -428,50 +955,20 @@ index 000000000..680222906 + return ProcedureType; // leaf type +} + -+void FeatureMiner::addSuccessorInfo(DominatorAnalysis &DA, -+ DominatorAnalysis &PDA, -+ BFIPtr const &BFI, BinaryFunction &Function, -+ BinaryContext &BC, MCInst &Inst, -+ BinaryBasicBlock &BB, bool SuccType) { ++void FeatureMiner::addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, ++ BinaryContext &BC, BinaryBasicBlock &BB, ++ bool SuccType) { + + BinaryBasicBlock *Successor = BB.getConditionalSuccessor(SuccType); + + if (!Successor) + return; + -+ unsigned NumLoads{0}; -+ unsigned NumStores{0}; -+ unsigned NumCallsExit{0}; + unsigned NumCalls{0}; -+ unsigned NumCallsInvoke{0}; -+ unsigned NumTailCalls{0}; -+ unsigned NumIndirectCalls{0}; + + for (auto &Inst : BB) { -+ if (BC.MIB->isLoad(Inst)) { -+ ++NumLoads; -+ } else if (BC.MIB->isStore(Inst)) { -+ ++NumStores; -+ } else if (BC.MIB->isCall(Inst)) { ++ if (BC.MIB->isCall(Inst)) { + ++NumCalls; -+ -+ if (BC.MIB->isIndirectCall(Inst)) -+ ++NumIndirectCalls; -+ -+ if (BC.MIB->isInvoke(Inst)) -+ ++NumCallsInvoke; -+ -+ if (BC.MIB->isTailCall(Inst)) -+ ++NumTailCalls; -+ -+ if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { -+ StringRef CalleeName = CalleeSymbol->getName(); -+ if (CalleeName == "__cxa_throw@PLT" || -+ CalleeName == "_Unwind_Resume@PLT" || -+ CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || -+ CalleeName == "abort@PLT") -+ ++NumCallsExit; -+ } + } + } + @@ -491,117 +988,77 @@ index 000000000..680222906 + SuccBBInfo->Backedge = SBI->isBackEdge(&BB, Successor); + + MCInst *SuccInst = Successor->getTerminatorBefore(nullptr); ++ + // Store information about the branch type ending sucessor basic block + SuccBBInfo->EndOpcode = (SuccInst && BC.MIA->isBranch(*SuccInst)) + ? SuccInst->getOpcode() + : 0; // 0 = NOTHING -+ if (SuccBBInfo->EndOpcode != 0) -+ SuccBBInfo->EndOpcodeStr = BC.MII->getName(SuccInst->getOpcode()); -+ else -+ SuccBBInfo->EndOpcodeStr = "NOTHING"; + + // Check if the successor basic block contains + // a procedure call and store it. + SuccBBInfo->Call = (NumCalls > 0) ? 1 // Contains a call instruction + : 0; // Does not contain a call instruction + -+ SuccBBInfo->NumStores = NumStores; -+ SuccBBInfo->NumLoads = NumLoads; -+ SuccBBInfo->NumCallsExit = NumCallsExit; -+ SuccBBInfo->NumCalls = NumCalls; -+ -+ SuccBBInfo->NumCallsInvoke = NumCallsInvoke; -+ SuccBBInfo->NumIndirectCalls = NumIndirectCalls; -+ SuccBBInfo->NumTailCalls = NumTailCalls; -+ -+ auto InstSucc = Successor->getLastNonPseudoInstr(); -+ if (InstSucc) { -+ // Check if the source basic block dominates its -+ // target basic block and store it. -+ SuccBBInfo->BranchDominates = (DA.doesADominateB(Inst, *InstSucc) == true) -+ ? 1 // Dominates -+ : 0; // Does not dominate -+ -+ // Check if the target basic block postdominates -+ // the source basic block and store it. -+ SuccBBInfo->BranchPostdominates = -+ (PDA.doesADominateB(*InstSucc, Inst) == true) -+ ? 1 // Postdominates -+ : 0; // Does not postdominate -+ } -+ -+ /// The follwoing information is used as an identifier only for -+ /// the purpose of matching the inferred probabilities with the branches -+ /// in the binary. -+ SuccBBInfo->FromFunName = Function.getPrintName(); -+ SuccBBInfo->FromBb = BB.getInputOffset(); -+ BinaryFunction *ToFun = Successor->getFunction(); -+ SuccBBInfo->ToFunName = ToFun->getPrintName(); -+ SuccBBInfo->ToBb = Successor->getInputOffset(); -+ -+ auto Offset = BC.MIB->tryGetAnnotationAs(Inst, "Offset"); -+ if (Offset) { -+ uint32_t TargetOffset = Successor->getInputOffset(); -+ uint32_t BranchOffset = Offset.get(); -+ BFI->BranchOffset = BranchOffset; -+ if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { -+ int64_t Delta = TargetOffset - BranchOffset; -+ BFI->DeltaTaken = std::abs(Delta); -+ } -+ } ++ uint32_t Offset = BB.getEndOffset(); + + if (SuccType) { + BFI->TrueSuccessor = std::move(SuccBBInfo); -+ + // Check if the taken branch is a forward -+ // or a backwards branch and store it. ++ // or a backwards branch and store it + BFI->Direction = (Function.isForwardBranch(&BB, Successor) == true) + ? 1 // Forward branch + : 0; // Backwards branch + -+ auto TakenBranchInfo = BB.getTakenBranchInfo(); -+ BFI->Count = TakenBranchInfo.Count; -+ BFI->MissPredicted = TakenBranchInfo.MispredictedCount; ++ auto OnlyBranchInfo = BB.getOnlyBranchInfo(); ++ BFI->Count = OnlyBranchInfo.Count; ++ ++ if (Offset) { ++ uint32_t TargetOffset = Successor->getInputOffset(); ++ uint32_t BranchOffset = Offset; ++ if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { ++ int64_t Delta = static_cast(TargetOffset) - ++ static_cast(BranchOffset); ++ BFI->DeltaTaken = std::abs(Delta); ++ } ++ } + } else { ++ if (BB.succ_size() == 2) { ++ auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); ++ BFI->FallthroughCount = FallthroughBranchInfo.Count; ++ } else { ++ auto OnlyBranchInfo = BB.getOnlyBranchInfo(); ++ BFI->FallthroughCount = OnlyBranchInfo.Count; ++ } + BFI->FalseSuccessor = std::move(SuccBBInfo); -+ -+ auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); -+ BFI->FallthroughCount = FallthroughBranchInfo.Count; -+ BFI->FallthroughMissPredicted = FallthroughBranchInfo.MispredictedCount; + } +} + -+void FeatureMiner::extractFeatures(BinaryFunction &Function, BinaryContext &BC, -+ raw_ostream &Printer) { ++void FeatureMiner::extractFeatures(BinaryFunction &Function, ++ BinaryContext &BC) { + int8_t ProcedureType = getProcedureType(Function, BC); -+// auto Info = DataflowInfoManager(BC, Function, nullptr, nullptr); + auto Info = DataflowInfoManager(Function, nullptr, nullptr); -+ auto &DA = Info.getDominatorAnalysis(); -+ auto &PDA = Info.getPostDominatorAnalysis(); + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); -+ bool Simple = Function.isSimple(); + -+// const auto &Order = opts::VespaUseDFS ? Function.dfs() : Function.getLayout(); ++ bool Simple = Function.isSimple(); + const auto &Order = Function.dfs(); ++ std::string Function_name = Function.getPrintName(); + + for (auto *BBA : Order) { + + auto &BB = *BBA; -+ unsigned NumOuterLoops{0}; ++ ++ BinaryBasicBlockFeature BBF = BB.getFeatures(); ++ + unsigned TotalLoops{0}; -+ unsigned MaximumLoopDepth{0}; + unsigned LoopDepth{0}; -+ unsigned LoopNumExitEdges{0}; -+ unsigned LoopNumExitBlocks{0}; -+ unsigned LoopNumExitingBlocks{0}; -+ unsigned LoopNumLatches{0}; + unsigned LoopNumBlocks{0}; -+ unsigned LoopNumBackEdges{0}; + + bool LocalExitingBlock{false}; + bool LocalLatchBlock{false}; + bool LocalLoopHeader{false}; + ++ generateProfileFeatures(&BB, &BBF); ++ + BinaryLoop *Loop = LoopsInfo.getLoopFor(&BB); + if (Loop) { + SmallVector ExitingBlocks; @@ -616,815 +1073,412 @@ index 000000000..680222906 + SmallVector Latches; + Loop->getLoopLatches(Latches); + -+ NumOuterLoops = LoopsInfo.OuterLoops; + TotalLoops = LoopsInfo.TotalLoops; -+ MaximumLoopDepth = LoopsInfo.MaximumDepth; + LoopDepth = Loop->getLoopDepth(); -+ LoopNumExitEdges = ExitEdges.size(); -+ LoopNumExitBlocks = ExitBlocks.size(); -+ LoopNumExitingBlocks = ExitingBlocks.size(); -+ LoopNumLatches = Latches.size(); + LoopNumBlocks = Loop->getNumBlocks(); -+ LoopNumBackEdges = Loop->getNumBackEdges(); -+ + LocalExitingBlock = Loop->isLoopExiting(&BB); + LocalLatchBlock = Loop->isLoopLatch(&BB); + LocalLoopHeader = ((Loop->getHeader() == (&BB)) ? 1 : 0); + } + + unsigned NumLoads{0}; -+ unsigned NumStores{0}; -+ unsigned NumCallsExit{0}; + unsigned NumCalls{0}; -+ unsigned NumCallsInvoke{0}; -+ unsigned NumTailCalls{0}; + unsigned NumIndirectCalls{0}; -+ unsigned NumSelfCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->isLoad(Inst)) { + ++NumLoads; -+ } else if (BC.MIB->isStore(Inst)) { -+ ++NumStores; + } else if (BC.MIB->isCall(Inst)) { + ++NumCalls; -+ + if (BC.MIB->isIndirectCall(Inst)) + ++NumIndirectCalls; -+ -+ if (BC.MIB->isInvoke(Inst)) -+ ++NumCallsInvoke; -+ -+ if (BC.MIB->isTailCall(Inst)) -+ ++NumTailCalls; -+ -+ if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { -+ StringRef CalleeName = CalleeSymbol->getName(); -+ if (CalleeName == "__cxa_throw@PLT" || -+ CalleeName == "_Unwind_Resume@PLT" || -+ CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || -+ CalleeName == "abort@PLT") -+ ++NumCallsExit; -+ else if (CalleeName == Function.getPrintName()) { -+ ++NumSelfCalls; -+ } -+ } + } + } + + int Index = -2; + bool LoopHeader = SBI->isLoopHeader(&BB); ++ ++ BFIPtr BFI = std::make_unique(); ++ ++ BFI->TotalLoops = TotalLoops; ++ BFI->LoopDepth = LoopDepth; ++ BFI->LoopNumBlocks = LoopNumBlocks; ++ BFI->LocalExitingBlock = LocalExitingBlock; ++ BFI->LocalLatchBlock = LocalLatchBlock; ++ BFI->LocalLoopHeader = LocalLoopHeader; ++ BFI->NumCalls = NumCalls; ++ BFI->BasicBlockSize = BB.size(); ++ BFI->NumBasicBlocks = Function.size(); ++ ++ BFI->NumLoads = NumLoads; ++ BFI->NumIndirectCalls = NumIndirectCalls; ++ BFI->LoopHeader = LoopHeader; ++ BFI->ProcedureType = ProcedureType; ++ ++ // Adding taken successor info. ++ addSuccessorInfo(BFI, Function, BC, BB, true); ++ // Adding fall through successor info. ++ addSuccessorInfo(BFI, Function, BC, BB, false); ++ ++ MCInst ConditionalInst; ++ bool hasConditionalBranch = false; ++ MCInst UnconditionalInst; ++ bool hasUnconditionalBranch = false; ++ + for (auto &Inst : BB) { + ++Index; -+ -+ if (!BC.MIA->isConditionalBranch(Inst)) ++ if (!BC.MIA->isConditionalBranch(Inst) && ++ !BC.MIA->isUnconditionalBranch(Inst)) + continue; + -+ BFIPtr BFI = std::make_unique(); -+ -+ BFI->Simple = Simple; -+ BFI->NumOuterLoops = NumOuterLoops; -+ BFI->TotalLoops = TotalLoops; -+ BFI->MaximumLoopDepth = MaximumLoopDepth; -+ BFI->LoopDepth = LoopDepth; -+ BFI->LoopNumExitEdges = LoopNumExitEdges; -+ BFI->LoopNumExitBlocks = LoopNumExitBlocks; -+ BFI->LoopNumExitingBlocks = LoopNumExitingBlocks; -+ BFI->LoopNumLatches = LoopNumLatches; -+ BFI->LoopNumBlocks = LoopNumBlocks; -+ BFI->LoopNumBackEdges = LoopNumBackEdges; -+ -+ BFI->LocalExitingBlock = LocalExitingBlock; -+ BFI->LocalLatchBlock = LocalLatchBlock; -+ BFI->LocalLoopHeader = LocalLoopHeader; -+ -+ BFI->Call = ((NumCalls > 0) ? 1 : 0); -+ BFI->NumCalls = NumCalls; -+ -+ BFI->BasicBlockSize = BB.size(); -+ BFI->NumBasicBlocks = Function.size(); -+ BFI->NumSelfCalls = NumSelfCalls; -+ -+ BFI->NumLoads = NumLoads; -+ BFI->NumStores = NumStores; -+ BFI->NumCallsExit = NumCallsExit; -+ -+ BFI->NumCallsInvoke = NumCallsInvoke; -+ BFI->NumIndirectCalls = NumIndirectCalls; -+ BFI->NumTailCalls = NumTailCalls; -+ -+ // Check if branch's basic block is a loop header and store it. -+ BFI->LoopHeader = LoopHeader; -+ -+ // Adding taken successor info. -+ addSuccessorInfo(DA, PDA, BFI, Function, BC, Inst, BB, true); -+ // Adding fall through successor info. -+ addSuccessorInfo(DA, PDA, BFI, Function, BC, Inst, BB, false); -+ -+ // Holds the branch opcode info. -+ BFI->Opcode = Inst.getOpcode(); -+ BFI->OpcodeStr = BC.MII->getName(Inst.getOpcode()); -+ -+ // Holds the branch's procedure type. -+ BFI->ProcedureType = ProcedureType; -+ -+ BFI->CmpOpcode = 0; -+ if (Index > -1) { -+ auto Cmp = BB.begin() + Index; -+ -+ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { -+ // Holding the branch comparison opcode info. -+ BFI->CmpOpcode = (*Cmp).getOpcode(); -+ -+ BFI->CmpOpcodeStr = BC.MII->getName((*Cmp).getOpcode()); -+ -+ auto getOperandType = [&](const MCOperand &Operand) -> int32_t { -+ if (Operand.isReg()) -+ return 0; -+ else if (Operand.isImm()) -+ return 1; -+ // else if (Operand.isFPImm()) -+ else if (Operand.isSFPImm()) -+ return 2; -+ else if (Operand.isExpr()) -+ return 3; -+ else -+ return -1; -+ }; -+ -+ const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); -+ unsigned NumDefs = InstInfo.getNumDefs(); -+ int32_t NumPrimeOperands = -+ MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; -+ switch (NumPrimeOperands) { -+ case 6: { -+ int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); -+ int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); -+ -+ if (RBType == 0 && RAType == 0) { -+ BFI->OperandRBType = RBType; -+ BFI->OperandRAType = RAType; -+ } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { -+ RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); -+ -+ if (RAType != 1 && RAType != 2) { -+ RAType = -1; -+ } -+ -+ BFI->OperandRBType = RBType; -+ BFI->OperandRAType = RAType; -+ } else { -+ BFI->OperandRAType = -1; -+ BFI->OperandRBType = -1; -+ } -+ break; -+ } -+ case 2: -+ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); -+ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); -+ break; -+ case 3: -+ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); -+ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); -+ break; -+ case 1: -+ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); -+ break; -+ default: -+ BFI->OperandRAType = -1; -+ BFI->OperandRBType = -1; -+ break; -+ } ++ generateInstFeatures(BC, BB, BFI, Index); + -+ } else { -+ Index -= 1; -+ for (int Idx = Index; Idx > -1; Idx--) { -+ auto Cmp = BB.begin() + Idx; -+ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { -+ // Holding the branch comparison opcode info. -+ BFI->CmpOpcode = (*Cmp).getOpcode(); -+ BFI->CmpOpcodeStr = BC.MII->getName((*Cmp).getOpcode()); -+ break; -+ } -+ } -+ } ++ if (BC.MIA->isConditionalBranch(Inst)) { ++ ConditionalInst = Inst; ++ hasConditionalBranch = true; + } + -+ //======================================================================== -+ -+ auto &FalseSuccessor = BFI->FalseSuccessor; -+ auto &TrueSuccessor = BFI->TrueSuccessor; -+ -+ if (!FalseSuccessor && !TrueSuccessor) -+ continue; -+ -+ int64_t BranchOffset = -+ (BFI->BranchOffset.hasValue()) -+ ? static_cast(*(BFI->BranchOffset)) -+ : -1; -+ if(BranchOffset == -1) -+ continue; -+ -+ int16_t ProcedureType = (BFI->ProcedureType.hasValue()) -+ ? static_cast(*(BFI->ProcedureType)) -+ : -1; -+ -+ int16_t Direction = (BFI->Direction.hasValue()) -+ ? static_cast(*(BFI->Direction)) -+ : -1; -+ -+ int16_t LoopHeader = (BFI->LoopHeader.hasValue()) -+ ? static_cast(*(BFI->LoopHeader)) -+ : -1; -+ -+ int32_t Opcode = -+ (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; -+ -+ int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) -+ ? static_cast(*(BFI->CmpOpcode)) -+ : -1; -+ -+ int64_t Count = -+ (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; -+ -+ int64_t MissPredicted = (BFI->MissPredicted.hasValue()) -+ ? static_cast(*(BFI->MissPredicted)) -+ : -1; -+ -+ int64_t FallthroughCount = -+ (BFI->FallthroughCount.hasValue()) -+ ? static_cast(*(BFI->FallthroughCount)) -+ : -1; -+ -+ int64_t FallthroughMissPredicted = -+ (BFI->FallthroughMissPredicted.hasValue()) -+ ? static_cast(*(BFI->FallthroughMissPredicted)) -+ : -1; -+ -+ int64_t NumOuterLoops = (BFI->NumOuterLoops.hasValue()) -+ ? static_cast(*(BFI->NumOuterLoops)) -+ : -1; -+ int64_t TotalLoops = (BFI->TotalLoops.hasValue()) -+ ? static_cast(*(BFI->TotalLoops)) -+ : -1; -+ int64_t MaximumLoopDepth = -+ (BFI->MaximumLoopDepth.hasValue()) -+ ? static_cast(*(BFI->MaximumLoopDepth)) -+ : -1; -+ int64_t LoopDepth = (BFI->LoopDepth.hasValue()) -+ ? static_cast(*(BFI->LoopDepth)) -+ : -1; -+ int64_t LoopNumExitEdges = -+ (BFI->LoopNumExitEdges.hasValue()) -+ ? static_cast(*(BFI->LoopNumExitEdges)) -+ : -1; -+ int64_t LoopNumExitBlocks = -+ (BFI->LoopNumExitBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumExitBlocks)) -+ : -1; -+ int64_t LoopNumExitingBlocks = -+ (BFI->LoopNumExitingBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumExitingBlocks)) -+ : -1; -+ int64_t LoopNumLatches = -+ (BFI->LoopNumLatches.hasValue()) -+ ? static_cast(*(BFI->LoopNumLatches)) -+ : -1; -+ int64_t LoopNumBlocks = (BFI->LoopNumBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumBlocks)) -+ : -1; -+ int64_t LoopNumBackEdges = -+ (BFI->LoopNumBackEdges.hasValue()) -+ ? static_cast(*(BFI->LoopNumBackEdges)) -+ : -1; -+ -+ int64_t LocalExitingBlock = -+ (BFI->LocalExitingBlock.hasValue()) -+ ? static_cast(*(BFI->LocalExitingBlock)) -+ : -1; -+ -+ int64_t LocalLatchBlock = (BFI->LocalLatchBlock.hasValue()) -+ ? static_cast(*(BFI->LocalLatchBlock)) -+ : -1; -+ -+ int64_t LocalLoopHeader = (BFI->LocalLoopHeader.hasValue()) -+ ? static_cast(*(BFI->LocalLoopHeader)) -+ : -1; -+ -+ int64_t Call = -+ (BFI->Call.hasValue()) ? static_cast(*(BFI->Call)) : -1; -+ -+ int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) -+ ? static_cast(*(BFI->DeltaTaken)) -+ : -1; -+ -+ int64_t NumLoads = (BFI->NumLoads.hasValue()) -+ ? static_cast(*(BFI->NumLoads)) -+ : -1; -+ -+ int64_t NumStores = (BFI->NumStores.hasValue()) -+ ? static_cast(*(BFI->NumStores)) -+ : -1; -+ -+ int64_t BasicBlockSize = -+ (BFI->BasicBlockSize.hasValue()) -+ ? static_cast(*(BFI->BasicBlockSize)) -+ : -1; -+ -+ int64_t NumBasicBlocks = -+ (BFI->NumBasicBlocks.hasValue()) -+ ? static_cast(*(BFI->NumBasicBlocks)) -+ : -1; -+ -+ int64_t NumCalls = (BFI->NumCalls.hasValue()) -+ ? static_cast(*(BFI->NumCalls)) -+ : -1; -+ -+ int64_t NumSelfCalls = (BFI->NumSelfCalls.hasValue()) -+ ? static_cast(*(BFI->NumSelfCalls)) -+ : -1; -+ -+ int64_t NumCallsExit = (BFI->NumCallsExit.hasValue()) -+ ? static_cast(*(BFI->NumCallsExit)) -+ : -1; -+ -+ int64_t OperandRAType = (BFI->OperandRAType.hasValue()) -+ ? static_cast(*(BFI->OperandRAType)) -+ : -1; -+ -+ int64_t OperandRBType = (BFI->OperandRBType.hasValue()) -+ ? static_cast(*(BFI->OperandRBType)) -+ : -1; -+ -+ int64_t NumCallsInvoke = -+ (BFI->NumCallsInvoke.hasValue()) -+ ? static_cast(*(BFI->NumCallsInvoke)) -+ : -1; -+ -+ int64_t NumIndirectCalls = -+ (BFI->NumIndirectCalls.hasValue()) -+ ? static_cast(*(BFI->NumIndirectCalls)) -+ : -1; -+ -+ int64_t NumTailCalls = (BFI->NumTailCalls.hasValue()) -+ ? static_cast(*(BFI->NumTailCalls)) -+ : -1; -+ -+ Printer << BFI->Simple << "," << Opcode << "," << BFI->OpcodeStr << "," -+ << Direction << "," << CmpOpcode << "," << BFI->CmpOpcodeStr -+ << "," << LoopHeader << "," << ProcedureType << "," << Count -+ << "," << MissPredicted << "," << FallthroughCount << "," -+ << FallthroughMissPredicted << "," << NumOuterLoops << "," -+ << NumCallsExit << "," << TotalLoops << "," << MaximumLoopDepth -+ << "," << LoopDepth << "," << LoopNumExitEdges << "," -+ << LoopNumExitBlocks << "," << LoopNumExitingBlocks << "," -+ << LoopNumLatches << "," << LoopNumBlocks << "," -+ << LoopNumBackEdges << "," << LocalExitingBlock << "," -+ << LocalLatchBlock << "," << LocalLoopHeader << "," << Call << "," -+ << DeltaTaken << "," << NumLoads << "," << NumStores << "," -+ << NumCalls << "," << OperandRAType << "," << OperandRBType << "," -+ << BasicBlockSize << "," << NumBasicBlocks << "," -+ << NumCallsInvoke << "," << NumIndirectCalls << "," -+ << NumTailCalls << "," << NumSelfCalls; -+ -+ if (FalseSuccessor && TrueSuccessor) { -+ dumpSuccessorFeatures(Printer, TrueSuccessor); -+ dumpSuccessorFeatures(Printer, FalseSuccessor); -+ -+ FalseSuccessor.reset(); -+ TrueSuccessor.reset(); ++ if (BC.MIA->isUnconditionalBranch(Inst)) { ++ UnconditionalInst = Inst; ++ hasUnconditionalBranch = true; + } -+ BFI.reset(); -+ -+ std::string BranchOffsetStr = (BranchOffset == -1) ? "None" : Twine::utohexstr(BranchOffset).str(); -+ -+ uint64_t fun_exec = Function.getExecutionCount(); -+ fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; -+ Printer << "," << Twine::utohexstr(Function.getAddress()) << "," -+ << fun_exec << "," << Function.getFunctionNumber() << "," -+ << Function.getOneName() << "," << Function.getPrintName() -+ << "," << BranchOffsetStr -+ << "\n"; -+ -+ //======================================================================== -+ -+ // this->BranchesInfoSet.push_back(std::move(BFI)); + } -+ } -+} + -+void FeatureMiner::dumpSuccessorFeatures(raw_ostream &Printer, -+ BBIPtr &Successor) { -+ int16_t BranchDominates = -+ (Successor->BranchDominates.hasValue()) -+ ? static_cast(*(Successor->BranchDominates)) -+ : -1; -+ -+ int16_t BranchPostdominates = -+ (Successor->BranchPostdominates.hasValue()) -+ ? static_cast(*(Successor->BranchPostdominates)) -+ : -1; -+ -+ int16_t LoopHeader = (Successor->LoopHeader.hasValue()) -+ ? static_cast(*(Successor->LoopHeader)) -+ : -1; -+ -+ int16_t Backedge = (Successor->Backedge.hasValue()) -+ ? static_cast(*(Successor->Backedge)) -+ : -1; -+ -+ int16_t Exit = -+ (Successor->Exit.hasValue()) ? static_cast(*(Successor->Exit)) : -1; -+ -+ int16_t Call = -+ (Successor->Call.hasValue()) ? static_cast(*(Successor->Call)) : -1; -+ -+ int32_t EndOpcode = (Successor->EndOpcode.hasValue()) -+ ? static_cast(*(Successor->EndOpcode)) -+ : -1; -+ -+ int64_t NumLoads = (Successor->NumLoads.hasValue()) -+ ? static_cast(*(Successor->NumLoads)) -+ : -1; -+ -+ int64_t NumStores = (Successor->NumStores.hasValue()) -+ ? static_cast(*(Successor->NumStores)) -+ : -1; -+ -+ int64_t BasicBlockSize = -+ (Successor->BasicBlockSize.hasValue()) -+ ? static_cast(*(Successor->BasicBlockSize)) -+ : -1; -+ -+ int64_t NumCalls = (Successor->NumCalls.hasValue()) -+ ? static_cast(*(Successor->NumCalls)) -+ : -1; -+ -+ int64_t NumCallsExit = (Successor->NumCallsExit.hasValue()) -+ ? static_cast(*(Successor->NumCallsExit)) -+ : -1; -+ -+ int64_t NumCallsInvoke = -+ (Successor->NumCallsInvoke.hasValue()) -+ ? static_cast(*(Successor->NumCallsInvoke)) -+ : -1; -+ -+ int64_t NumIndirectCalls = -+ (Successor->NumIndirectCalls.hasValue()) -+ ? static_cast(*(Successor->NumIndirectCalls)) -+ : -1; ++ if (hasConditionalBranch) { ++ BFI->Opcode = ConditionalInst.getOpcode(); + -+ int64_t NumTailCalls = (Successor->NumTailCalls.hasValue()) -+ ? static_cast(*(Successor->NumTailCalls)) -+ : -1; -+ -+ Printer << "," << BranchDominates << "," << BranchPostdominates << "," -+ << EndOpcode << "," << Successor->EndOpcodeStr << "," << LoopHeader -+ << "," << Backedge << "," << Exit << "," << Call << "," -+ << Successor->FromFunName << "," -+ << Twine::utohexstr(Successor->FromBb) << "," << Successor->ToFunName -+ << "," << Twine::utohexstr(Successor->ToBb) << "," << NumLoads << "," -+ << NumStores << "," << BasicBlockSize << "," << NumCalls << "," -+ << NumCallsExit << "," << NumIndirectCalls << "," << NumCallsInvoke -+ << "," << NumTailCalls; -+} ++ } else { ++ if (hasUnconditionalBranch) { ++ BFI->Opcode = UnconditionalInst.getOpcode(); + -+void FeatureMiner::dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, -+ uint64_t FunctionFrequency) { ++ } else { ++ auto Inst = BB.getLastNonPseudoInstr(); ++ BFI->Opcode = Inst->getOpcode(); ++ generateInstFeatures(BC, BB, BFI, Index); ++ } ++ } + -+ for (auto const &BFI : BranchesInfoSet) { + auto &FalseSuccessor = BFI->FalseSuccessor; + auto &TrueSuccessor = BFI->TrueSuccessor; + -+ if (!FalseSuccessor && !TrueSuccessor) -+ continue; -+ -+ int16_t ProcedureType = (BFI->ProcedureType.hasValue()) ++ int16_t ProcedureType = (BFI->ProcedureType.has_value()) + ? static_cast(*(BFI->ProcedureType)) + : -1; + -+ int16_t Direction = -+ (BFI->Direction.hasValue()) ? static_cast(*(BFI->Direction)) : -1; -+ -+ int16_t LoopHeader = (BFI->LoopHeader.hasValue()) -+ ? static_cast(*(BFI->LoopHeader)) -+ : -1; -+ -+ int32_t Opcode = -+ (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; -+ -+ int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) -+ ? static_cast(*(BFI->CmpOpcode)) -+ : -1; -+ + int64_t Count = -+ (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; -+ -+ int64_t MissPredicted = (BFI->MissPredicted.hasValue()) -+ ? static_cast(*(BFI->MissPredicted)) -+ : -1; ++ (BFI->Count.has_value()) ? static_cast(*(BFI->Count)) : -1; + + int64_t FallthroughCount = -+ (BFI->FallthroughCount.hasValue()) ++ (BFI->FallthroughCount.has_value()) + ? static_cast(*(BFI->FallthroughCount)) + : -1; + -+ int64_t FallthroughMissPredicted = -+ (BFI->FallthroughMissPredicted.hasValue()) -+ ? static_cast(*(BFI->FallthroughMissPredicted)) -+ : -1; ++ int16_t LoopHeaderValid = (BFI->LoopHeader.has_value()) ++ ? static_cast(*(BFI->LoopHeader)) ++ : -1; + -+ int64_t NumOuterLoops = (BFI->NumOuterLoops.hasValue()) -+ ? static_cast(*(BFI->NumOuterLoops)) -+ : -1; -+ int64_t TotalLoops = (BFI->TotalLoops.hasValue()) -+ ? static_cast(*(BFI->TotalLoops)) -+ : -1; -+ int64_t MaximumLoopDepth = -+ (BFI->MaximumLoopDepth.hasValue()) -+ ? static_cast(*(BFI->MaximumLoopDepth)) -+ : -1; -+ int64_t LoopDepth = (BFI->LoopDepth.hasValue()) -+ ? static_cast(*(BFI->LoopDepth)) -+ : -1; -+ int64_t LoopNumExitEdges = -+ (BFI->LoopNumExitEdges.hasValue()) -+ ? static_cast(*(BFI->LoopNumExitEdges)) -+ : -1; -+ int64_t LoopNumExitBlocks = -+ (BFI->LoopNumExitBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumExitBlocks)) ++ int64_t TotalLoopsValid = (BFI->TotalLoops.has_value()) ++ ? static_cast(*(BFI->TotalLoops)) ++ : -1; ++ int64_t LoopDepthValid = (BFI->LoopDepth.has_value()) ++ ? static_cast(*(BFI->LoopDepth)) ++ : -1; ++ int64_t LoopNumBlocksValid = ++ (BFI->LoopNumBlocks.has_value()) ++ ? static_cast(*(BFI->LoopNumBlocks)) + : -1; -+ int64_t LoopNumExitingBlocks = -+ (BFI->LoopNumExitingBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumExitingBlocks)) ++ int64_t LocalExitingBlockValid = ++ (BFI->LocalExitingBlock.has_value()) ++ ? static_cast(*(BFI->LocalExitingBlock)) + : -1; -+ int64_t LoopNumLatches = (BFI->LoopNumLatches.hasValue()) -+ ? static_cast(*(BFI->LoopNumLatches)) -+ : -1; -+ int64_t LoopNumBlocks = (BFI->LoopNumBlocks.hasValue()) -+ ? static_cast(*(BFI->LoopNumBlocks)) -+ : -1; -+ int64_t LoopNumBackEdges = -+ (BFI->LoopNumBackEdges.hasValue()) -+ ? static_cast(*(BFI->LoopNumBackEdges)) ++ ++ int64_t LocalLatchBlockValid = ++ (BFI->LocalLatchBlock.has_value()) ++ ? static_cast(*(BFI->LocalLatchBlock)) + : -1; + -+ int64_t LocalExitingBlock = -+ (BFI->LocalExitingBlock.hasValue()) -+ ? static_cast(*(BFI->LocalExitingBlock)) ++ int64_t LocalLoopHeaderValid = ++ (BFI->LocalLoopHeader.has_value()) ++ ? static_cast(*(BFI->LocalLoopHeader)) + : -1; + -+ int64_t LocalLatchBlock = (BFI->LocalLatchBlock.hasValue()) -+ ? static_cast(*(BFI->LocalLatchBlock)) -+ : -1; ++ int32_t CmpOpcode = (BFI->CmpOpcode.has_value()) ++ ? static_cast(*(BFI->CmpOpcode)) ++ : -1; + -+ int64_t LocalLoopHeader = (BFI->LocalLoopHeader.hasValue()) -+ ? static_cast(*(BFI->LocalLoopHeader)) -+ : -1; ++ int64_t OperandRAType = (BFI->OperandRAType.has_value()) ++ ? static_cast(*(BFI->OperandRAType)) ++ : 10; + -+ int64_t Call = -+ (BFI->Call.hasValue()) ? static_cast(*(BFI->Call)) : -1; ++ int64_t OperandRBType = (BFI->OperandRBType.has_value()) ++ ? static_cast(*(BFI->OperandRBType)) ++ : 10; ++ int16_t Direction = (BFI->Direction.has_value()) ++ ? static_cast(*(BFI->Direction)) ++ : -1; + -+ int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) ++ int64_t DeltaTaken = (BFI->DeltaTaken.has_value()) + ? static_cast(*(BFI->DeltaTaken)) + : -1; + -+ int64_t NumLoads = (BFI->NumLoads.hasValue()) -+ ? static_cast(*(BFI->NumLoads)) -+ : -1; -+ -+ int64_t NumStores = (BFI->NumStores.hasValue()) -+ ? static_cast(*(BFI->NumStores)) -+ : -1; ++ int64_t NumLoadsValid = (BFI->NumLoads.has_value()) ++ ? static_cast(*(BFI->NumLoads)) ++ : -1; + -+ int64_t BasicBlockSize = (BFI->BasicBlockSize.hasValue()) ++ int64_t BasicBlockSize = (BFI->BasicBlockSize.has_value()) + ? static_cast(*(BFI->BasicBlockSize)) + : -1; + -+ int64_t BranchOffset = (BFI->BranchOffset.hasValue()) -+ ? static_cast(*(BFI->BranchOffset)): -1; -+ -+ int64_t NumBasicBlocks = (BFI->NumBasicBlocks.hasValue()) ++ int64_t NumBasicBlocks = (BFI->NumBasicBlocks.has_value()) + ? static_cast(*(BFI->NumBasicBlocks)) + : -1; + -+ int64_t NumCalls = (BFI->NumCalls.hasValue()) -+ ? static_cast(*(BFI->NumCalls)) -+ : -1; -+ -+ int64_t NumSelfCalls = (BFI->NumSelfCalls.hasValue()) -+ ? static_cast(*(BFI->NumSelfCalls)) -+ : -1; -+ -+ int64_t NumCallsExit = (BFI->NumCallsExit.hasValue()) -+ ? static_cast(*(BFI->NumCallsExit)) -+ : -1; -+ -+ int64_t OperandRAType = (BFI->OperandRAType.hasValue()) -+ ? static_cast(*(BFI->OperandRAType)) -+ : -1; -+ -+ int64_t OperandRBType = (BFI->OperandRBType.hasValue()) -+ ? static_cast(*(BFI->OperandRBType)) ++ int64_t NumCallsValid = (BFI->NumCalls.has_value()) ++ ? static_cast(*(BFI->NumCalls)) + : -1; + -+ int64_t NumCallsInvoke = (BFI->NumCallsInvoke.hasValue()) -+ ? static_cast(*(BFI->NumCallsInvoke)) -+ : -1; -+ -+ int64_t NumIndirectCalls = -+ (BFI->NumIndirectCalls.hasValue()) ++ int64_t NumIndirectCallsValid = ++ (BFI->NumIndirectCalls.has_value()) + ? static_cast(*(BFI->NumIndirectCalls)) + : -1; + -+ int64_t NumTailCalls = (BFI->NumTailCalls.hasValue()) -+ ? static_cast(*(BFI->NumTailCalls)) -+ : -1; -+ -+ Printer << BFI->Simple << "," << Opcode << "," << BFI->OpcodeStr << "," -+ << Direction << "," << CmpOpcode << "," << BFI->CmpOpcodeStr << "," -+ << LoopHeader << "," << ProcedureType << "," << Count << "," -+ << MissPredicted << "," << FallthroughCount << "," -+ << FallthroughMissPredicted << "," << NumOuterLoops << "," -+ << NumCallsExit << "," << TotalLoops << "," << MaximumLoopDepth -+ << "," << LoopDepth << "," << LoopNumExitEdges << "," -+ << LoopNumExitBlocks << "," << LoopNumExitingBlocks << "," -+ << LoopNumLatches << "," << LoopNumBlocks << "," << LoopNumBackEdges -+ << "," << LocalExitingBlock << "," << LocalLatchBlock << "," -+ << LocalLoopHeader << "," << Call << "," << DeltaTaken << "," -+ << NumLoads << "," << NumStores << "," << NumCalls << "," -+ << OperandRAType << "," << OperandRBType << "," << BasicBlockSize -+ << "," << NumBasicBlocks << "," << NumCallsInvoke << "," -+ << NumIndirectCalls << "," << NumTailCalls << "," << NumSelfCalls; -+ -+ if (FalseSuccessor && TrueSuccessor) { -+ dumpSuccessorFeatures(Printer, TrueSuccessor); -+ dumpSuccessorFeatures(Printer, FalseSuccessor); -+ } ++ int64_t HasIndirectCalls = (NumIndirectCallsValid > 0) ? 1 : 0; + -+ Printer << "," << Twine::utohexstr(FunctionAddress) << "," -+ << FunctionFrequency << "\n"; ++ int32_t Opcode = ++ (BFI->Opcode.has_value()) ? static_cast(*(BFI->Opcode)) : -1; ++ ++ uint64_t fun_exec = Function.getExecutionCount(); ++ fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; ++ ++ BBF.setDirection(Direction); ++ BBF.setDeltaTaken(DeltaTaken); ++ BBF.setOpcode(Opcode); ++ BBF.setCmpOpcode(CmpOpcode); ++ BBF.setOperandRAType(OperandRAType); ++ BBF.setOperandRBType(OperandRBType); ++ BBF.setFunExec(fun_exec); ++ BBF.setTotalLoops(TotalLoopsValid); ++ BBF.setLoopDepth(LoopDepthValid); ++ BBF.setLoopNumBlocks(LoopNumBlocksValid); ++ BBF.setLocalExitingBlock(LocalExitingBlockValid); ++ BBF.setLocalLatchBlock(LocalLatchBlockValid); ++ BBF.setLocalLoopHeader(LocalLoopHeaderValid); ++ BBF.setNumCalls(NumCallsValid); ++ BBF.setBasicBlockSize(BasicBlockSize); ++ BBF.setNumBasicBlocks(NumBasicBlocks); ++ BBF.setNumLoads(NumLoadsValid); ++ BBF.setHasIndirectCalls(HasIndirectCalls); ++ BBF.setLoopHeader(LoopHeaderValid); ++ BBF.setProcedureType(ProcedureType); ++ BBF.setCount(Count); ++ BBF.setFallthroughCount(FallthroughCount); ++ ++ generateSuccessorFeatures(TrueSuccessor, &BBF); ++ generateSuccessorFeatures(FalseSuccessor, &BBF); ++ ++ FalseSuccessor.reset(); ++ TrueSuccessor.reset(); ++ ++ BBF.setInferenceFeatures(); ++ BB.setFeatures(BBF); ++ ++ BFI.reset(); + } -+ BranchesInfoSet.clear(); +} + -+void FeatureMiner::runOnFunctions(BinaryContext &BC) { -+ auto FileName = "features_new.csv"; -+ outs() << "BOLT-INFO: Starting feature miner pass\n"; ++void FeatureMiner::generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, ++ BFIPtr const &BFI, int Index) { ++ ++ // Holds the branch opcode info. ++ ++ BFI->CmpOpcode = 0; ++ if (Index > -1) { ++ auto Cmp = BB.begin() + Index; ++ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { ++ // Holding the branch comparison opcode info. ++ BFI->CmpOpcode = (*Cmp).getOpcode(); ++ auto getOperandType = [&](const MCOperand &Operand) -> int32_t { ++ if (Operand.isReg()) ++ return 0; ++ else if (Operand.isImm()) ++ return 1; ++ else if (Operand.isSFPImm()) ++ return 2; ++ else if (Operand.isExpr()) ++ return 3; ++ else ++ return -1; ++ }; ++ ++ const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); ++ unsigned NumDefs = InstInfo.getNumDefs(); ++ int32_t NumPrimeOperands = MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; ++ switch (NumPrimeOperands) { ++ case 6: { ++ int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); ++ int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); ++ ++ if (RBType == 0 && RAType == 0) { ++ BFI->OperandRBType = RBType; ++ BFI->OperandRAType = RAType; ++ } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { ++ RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); ++ ++ if (RAType != 1 && RAType != 2) { ++ RAType = -1; ++ } + -+ std::error_code EC; -+// raw_fd_ostream Printer(FileName, EC, sys::fs::F_None); -+ raw_fd_ostream Printer(FileName, EC, sys::fs::OF_None); ++ BFI->OperandRBType = RBType; ++ BFI->OperandRAType = RAType; ++ } else { ++ BFI->OperandRAType = -1; ++ BFI->OperandRBType = -1; ++ } ++ break; ++ } ++ case 2: ++ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); ++ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); ++ break; ++ case 3: ++ BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); ++ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); ++ break; ++ case 1: ++ BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); ++ break; ++ default: ++ BFI->OperandRAType = -1; ++ BFI->OperandRBType = -1; ++ break; ++ } + -+ if (EC) { -+ errs() << "BOLT-WARNING: " << EC.message() << ", unable to open " -+ << FileName << " for output.\n"; -+ return; ++ } else { ++ Index -= 1; ++ for (int Idx = Index; Idx > -1; Idx--) { ++ auto Cmp = BB.begin() + Idx; ++ if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { ++ // Holding the branch comparison opcode info. ++ BFI->CmpOpcode = (*Cmp).getOpcode(); ++ break; ++ } ++ } ++ } + } ++} + -+ auto FILENAME = "profile_data_regular.fdata"; -+// raw_fd_ostream Printer2(FILENAME, EC, sys::fs::F_None); -+ raw_fd_ostream Printer2(FILENAME, EC, sys::fs::OF_None); ++void FeatureMiner::generateSuccessorFeatures(BBIPtr &Successor, ++ BinaryBasicBlockFeature *BBF) { + -+ if (EC) { -+ dbgs() << "BOLT-WARNING: " << EC.message() << ", unable to open" -+ << " " << FILENAME << " for output.\n"; -+ return; -+ } ++ int16_t LoopHeader = (Successor->LoopHeader.has_value()) ++ ? static_cast(*(Successor->LoopHeader)) ++ : -1; + -+ // CSV file header -+ Printer << "FUN_TYPE,OPCODE,OPCODE_STR,DIRECTION,CMP_OPCODE,CMP_OPCODE_STR," -+ "LOOP_HEADER,PROCEDURE_TYPE," -+ "COUNT_TAKEN,MISS_TAKEN,COUNT_NOT_TAKEN,MISS_NOT_TAKEN," -+ "NUM_OUTER_LOOPS,NUM_CALLS_EXIT,TOTAL_LOOPS,MAXIMUM_LOOP_DEPTH," -+ "LOOP_DEPTH,LOOP_NUM_EXIT_EDGES,LOOP_NUM_EXIT_BLOCKS," -+ "LOOP_NUM_EXITING_BLOCKS,LOOP_NUM_LATCHES,LOOP_NUM_BLOCKS," -+ "LOOP_NUM_BAKCEDGES,LOCAL_EXITING_BLOCK,LOCAL_LATCH_BLOCK," -+ "LOCAL_LOOP_HEADER,CALL,DELTA_TAKEN,NUM_LOADS,NUM_STORES," -+ "NUM_CALLS,OPERAND_RA_TYPE,OPERAND_RB_TYPE,BASIC_BLOCK_SIZE," -+ "NUM_BASIC_BLOCKS,NUM_CALLS_INVOKE,NUM_INDIRECT_CALLS," -+ "NUM_TAIL_CALLS,NUM_SELF_CALLS,TS_DOMINATES,TS_POSTDOMINATES," -+ "TS_END_OPCODE,TS_END_OPCODE_STR,TS_LOOP_HEADER,TS_BACKEDGE,TS_" -+ "EXIT,TS_CALL," -+ "TS_FROM_FUN_NAME,TS_FROM_BB,TS_TO_FUN_NAME,TS_TO_BB,TS_NUM_LOADS," -+ "TS_NUM_STORES,TS_BASIC_BLOCK_SIZE,TS_NUM_CALLS,TS_NUM_CALLS_EXIT," -+ "TS_NUM_INDIRECT_CALL,TS_NUM_CALLS_INVOKE,TS_NUM_TAIL_CALLS," -+ "FS_DOMINATES,FS_POSTDOMINATES,FS_END_OPCODE,FS_END_OPCODE_STR,FS_" -+ "LOOP_HEADER," -+ "FS_BACKEDGE,FS_EXIT,FS_CALL,FS_FROM_FUN_NAME,FS_FROM_BB," -+ "FS_TO_FUN_NAME,FS_TO_BB,FS_NUM_LOADS,FS_NUM_STORES," -+ "FS_BASIC_BLOCK_SIZE,FS_NUM_CALLS,FS_NUM_CALLS_EXIT," -+ "FS_NUM_INDIRECT_CALL,FS_NUM_CALLS_INVOKE,FS_NUM_TAIL_CALLS," -+ "FUN_ENTRY_ADDRESS,FUN_ENTRY_FREQUENCY" -+ ",FUN_UNIQUE_NUMBER,FUN_ONE_NAME,FUN_PRINT_NAME," -+ "BRANCH_ADDRESS\n"; -+ -+ auto &BFs = BC.getBinaryFunctions(); -+ SBI = std::make_unique(); -+ for (auto &BFI : BFs) { -+ BinaryFunction &Function = BFI.second; ++ int16_t Backedge = (Successor->Backedge.has_value()) ++ ? static_cast(*(Successor->Backedge)) ++ : -1; + -+ if (Function.empty() || (Function.hasValidProfile() && opts::IncludeValidProfile)) -+ continue; ++ int16_t Exit = (Successor->Exit.has_value()) ++ ? static_cast(*(Successor->Exit)) ++ : -1; + -+ if (!Function.isLoopFree()) { -+ const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); -+ SBI->findLoopEdgesInfo(LoopsInfo); -+ } -+ extractFeatures(Function, BC, Printer); ++ int16_t Call = (Successor->Call.has_value()) ++ ? static_cast(*(Successor->Call)) ++ : -1; + -+ SBI->clear(); ++ int32_t EndOpcode = (Successor->EndOpcode.has_value()) ++ ? static_cast(*(Successor->EndOpcode)) ++ : -1; + -+ // dumpProfileData(Function, Printer2); -+ } ++ int64_t BasicBlockSize = ++ (Successor->BasicBlockSize.has_value()) ++ ? static_cast(*(Successor->BasicBlockSize)) ++ : -1; + -+ outs() << "BOLT-INFO: Dumping two-way conditional branches' features" -+ << " at " << FileName << "\n"; ++ BBF->setEndOpcodeVec(EndOpcode); ++ BBF->setLoopHeaderVec(LoopHeader); ++ BBF->setBackedgeVec(Backedge); ++ BBF->setExitVec(Exit); ++ BBF->setCallVec(Call); ++ BBF->setBasicBlockSizeVec(BasicBlockSize); +} + -+/*void FeatureMiner::dumpProfileData(BinaryFunction &Function, -+ raw_ostream &Printer) { ++void FeatureMiner::runOnFunctions(BinaryContext &BC) {} + -+ BinaryContext &BC = Function.getBinaryContext(); ++void FeatureMiner::inferenceFeatures(BinaryFunction &Function) { + -+ std::string FromFunName = Function.getPrintName(); -+ for (auto &BB : Function) { -+ auto LastInst = BB.getLastNonPseudoInstr(); -+ -+ for (auto &Inst : BB) { -+ if (!BC.MIB->isCall(Inst) && !BC.MIB->isBranch(Inst) && -+ LastInst != (&Inst)) -+ continue; -+ -+ auto Offset = BC.MIB->tryGetAnnotationAs(Inst, "Offset"); ++ SBI = std::make_unique(); + -+ if (!Offset) -+ continue; ++ if (Function.empty()) ++ return; + -+ uint64_t TakenFreqEdge = 0; -+ auto FromBb = Offset.get(); -+ std::string ToFunName; -+ uint32_t ToBb; ++ if (!Function.isLoopFree()) { ++ const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); ++ SBI->findLoopEdgesInfo(LoopsInfo); ++ } + -+ if (BC.MIB->isCall(Inst)) { -+ auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst); -+ if (!CalleeSymbol) -+ continue; ++ BinaryContext &BC = Function.getBinaryContext(); ++ extractFeatures(Function, BC); + -+ ToFunName = CalleeSymbol->getName(); -+ ToBb = 0; ++ SBI->clear(); ++} + -+ if (BC.MIB->getConditionalTailCall(Inst)) { ++void FeatureMiner::generateProfileFeatures(BinaryBasicBlock *BB, ++ BinaryBasicBlockFeature *BBF) { ++ int32_t parentChildNum, parentCount, childParentNum, childCount; + -+ if (BC.MIB->hasAnnotation(Inst, "CTCTakenCount")) { -+ auto CountAnnt = -+ BC.MIB->tryGetAnnotationAs(Inst, "CTCTakenCount"); -+ if (CountAnnt) { -+ TakenFreqEdge = (*CountAnnt); -+ } -+ } -+ } else { -+ if (BC.MIB->hasAnnotation(Inst, "Count")) { -+ auto CountAnnt = -+ BC.MIB->tryGetAnnotationAs(Inst, "Count"); -+ if (CountAnnt) { -+ TakenFreqEdge = (*CountAnnt); -+ } -+ } -+ } ++ if (BB->getParentSet().size() == 0) { ++ parentChildNum = -1; ++ parentCount = -1; ++ } else { ++ parentChildNum = std::numeric_limits::max(); ++ parentCount = 0; ++ for (BinaryBasicBlock *parent : BB->getParentSet()) { ++ if (parent->getChildrenSet().size() < parentChildNum) { ++ parentChildNum = parent->getChildrenSet().size(); ++ parentCount = parent->getExecutionCount(); ++ } else if (parent->getChildrenSet().size() == parentChildNum && ++ parent->getExecutionCount() > parentCount) { ++ parentCount = parent->getExecutionCount(); ++ } ++ } ++ } + -+ if (TakenFreqEdge > 0) -+ Printer << "1 " << FromFunName << " " << Twine::utohexstr(FromBb) -+ << " 1 " << ToFunName << " " << Twine::utohexstr(ToBb) << " " -+ << 0 << " " << TakenFreqEdge << "\n"; -+ } else { -+ for (BinaryBasicBlock *SuccBB : BB.successors()) { -+ TakenFreqEdge = BB.getBranchInfo(*SuccBB).Count; -+ BinaryFunction *ToFun = SuccBB->getFunction(); -+ ToFunName = ToFun->getPrintName(); -+ ToBb = SuccBB->getInputOffset(); -+ -+ if (TakenFreqEdge > 0) -+ Printer << "1 " << FromFunName << " " << Twine::utohexstr(FromBb) -+ << " 1 " << ToFunName << " " << Twine::utohexstr(ToBb) -+ << " " << 0 << " " << TakenFreqEdge << "\n"; -+ } ++ if (BB->getChildrenSet().size() == 0) { ++ childParentNum = -1; ++ childCount = -1; ++ } else { ++ childParentNum = std::numeric_limits::max(); ++ childCount = 0; ++ for (BinaryBasicBlock *child : BB->getChildrenSet()) { ++ if (child->getParentSet().size() < childParentNum) { ++ childParentNum = child->getParentSet().size(); ++ childCount = child->getExecutionCount(); ++ } else if (child->getParentSet().size() == childParentNum && ++ child->getExecutionCount() > childCount) { ++ childCount = child->getExecutionCount(); + } + } + } ++ ++ int64_t parentCountCatch = parentCount > 0 ? 1 : 0; ++ int64_t childCountCatch = childCount > 0 ? 1 : 0; ++ ++ BBF->setParentChildNum(parentChildNum); ++ BBF->setParentCount(parentCountCatch); ++ BBF->setChildParentNum(childParentNum); ++ BBF->setChildCount(childCountCatch); +} -+*/ + +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Passes/StaticBranchInfo.cpp b/bolt/lib/Passes/StaticBranchInfo.cpp new file mode 100644 -index 000000000..13426b397 +index 000000000..585dbcae2 --- /dev/null +++ b/bolt/lib/Passes/StaticBranchInfo.cpp -@@ -0,0 +1,162 @@ +@@ -0,0 +1,143 @@ +//===------ Passes/StaticBranchInfo.cpp -----------------------------------===// +// +// The LLVM Compiler Infrastructure @@ -1442,10 +1496,9 @@ index 000000000..13426b397 +// +//===----------------------------------------------------------------------===// + -+// #include "Passes/StaticBranchInfo.h" -+// #include "BinaryBasicBlock.h" -+#include "bolt/Passes/StaticBranchInfo.h" +#include "bolt/Core/BinaryBasicBlock.h" ++#include "bolt/Core/BinaryLoop.h" ++#include "bolt/Passes/StaticBranchInfo.h" + +namespace llvm { +namespace bolt { @@ -1517,8 +1570,9 @@ index 000000000..13426b397 + +bool StaticBranchInfo::isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { -+// const BinaryLoop::Edge CFGEdge = std::make_pair(SrcBB, DstBB); -+ const BinaryLoop::Edge CFGEdge = std::make_pair(const_cast(SrcBB), const_cast(DstBB)); ++ const BinaryLoop::Edge CFGEdge = ++ std::make_pair(const_cast(SrcBB), ++ const_cast(DstBB)); + return isExitEdge(CFGEdge); +} + @@ -1534,25 +1588,6 @@ index 000000000..13426b397 + return StoreSet.count(BB); +} + -+bool StaticBranchInfo::callToExit(BinaryBasicBlock *BB, -+ BinaryContext &BC) const { -+ auto &currBB = *BB; -+ for (auto &Inst : currBB) { -+ if (BC.MIB->isCall(Inst)) { -+ if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { -+ StringRef CalleeName = CalleeSymbol->getName(); -+ if (CalleeName == "__cxa_throw@PLT" || -+ CalleeName == "_Unwind_Resume@PLT" || -+ CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || -+ CalleeName == "abort@PLT") -+ return true; -+ } -+ } -+ } -+ -+ return false; -+} -+ +unsigned StaticBranchInfo::countBackEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + @@ -1587,7 +1622,199 @@ index 000000000..13426b397 + +} // namespace bolt +} // namespace llvm -\ No newline at end of file +diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp +index 0e12e8cb3..447b71fe7 100644 +--- a/bolt/lib/Profile/DataReader.cpp ++++ b/bolt/lib/Profile/DataReader.cpp +@@ -12,13 +12,16 @@ + //===----------------------------------------------------------------------===// + + #include "bolt/Profile/DataReader.h" ++#include "bolt/Passes/FeatureMiner.h" + #include "bolt/Core/BinaryFunction.h" + #include "bolt/Passes/MCF.h" + #include "bolt/Utils/Utils.h" + #include "llvm/Support/CommandLine.h" + #include "llvm/Support/Debug.h" + #include "llvm/Support/Errc.h" ++#include + #include ++#include + + #undef DEBUG_TYPE + #define DEBUG_TYPE "bolt-prof" +@@ -26,15 +29,23 @@ + using namespace llvm; + + namespace opts { +- ++extern cl::opt BlockCorrection; + extern cl::OptionCategory BoltCategory; + extern llvm::cl::opt Verbosity; + +-static cl::opt +-DumpData("dump-data", +- cl::desc("dump parsed bolt data for debugging"), +- cl::Hidden, +- cl::cat(BoltCategory)); ++static cl::opt InputModelFilename("model-path", ++ cl::desc(""), ++ cl::Optional, ++ cl::cat(BoltCategory)); ++ ++static cl::opt AnnotateThreshold( ++ "annotate-threshold", ++ cl::desc(""), ++ cl::init(0.85f), cl::Optional, cl::cat(BoltCategory)); ++ ++static cl::opt DumpData("dump-data", ++ cl::desc("dump parsed bolt data for debugging"), ++ cl::Hidden, cl::cat(BoltCategory)); + + } // namespace opts + +@@ -311,6 +322,17 @@ Error DataReader::readProfilePreCFG(BinaryContext &BC) { + } + + Error DataReader::readProfile(BinaryContext &BC) { ++ ++ if (opts::BlockCorrection) { ++ if (opts::InputModelFilename.empty()) { ++ outs() << "error: llvm-bolt expected -model-path= option.\n"; ++ exit(1); ++ } else { ++ DataReader::initializeONNXRunner(opts::InputModelFilename); ++ DataReader::setThreshold(opts::AnnotateThreshold); ++ } ++ } ++ + for (auto &BFI : BC.getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + readProfile(Function); +@@ -324,6 +346,12 @@ Error DataReader::readProfile(BinaryContext &BC) { + } + BC.setNumUnusedProfiledObjects(NumUnused); + ++ if (opts::BlockCorrection) { ++ uint64_t modified_total = DataReader::getModifiedBBTotal(); ++ outs() << "BOLT-INFO: total modified CFG BB count number is " ++ << modified_total << ".\n"; ++ } ++ + return Error::success(); + } + +@@ -555,6 +583,75 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, + return MatchRatio; + } + ++void generateChildrenParentCount(BinaryBasicBlock *BB) { ++ typedef GraphTraits GraphT; ++ ++ for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), ++ E = GraphT::child_end(BB); ++ CI != E; ++CI) { ++ typename GraphT::NodeRef Child = *CI; ++ BB->insertChildrenSet(Child); ++ Child->insertParentSet(BB); ++ } ++} ++ ++void generateChildrenParentCount(BinaryFunction &BF) { ++ for (BinaryBasicBlock &BB : BF) { ++ generateChildrenParentCount(&BB); ++ } ++} ++ ++uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryBasicBlock *BB, ++ float threshold) { ++ uint64_t modified = 0; ++ if (BB->getExecutionCount() != 0) { ++ return modified; ++ } ++ ++ std::vector input_string; ++ std::vector input_int64; ++ std::vector input_float; ++ ++ BinaryBasicBlockFeature BBF = BB->getFeatures(); ++ input_int64 = BBF.getInferenceFeatures(); ++ ++ if (input_int64.empty()) { ++ return 0; ++ } ++ ++ float model_pred = ++ dataReaderRef->ONNXInference(input_string, input_int64, input_float); ++ if (model_pred >= threshold) { ++ uint64_t min_neighbor_count = std::numeric_limits::max(); ++ for (BinaryBasicBlock *parent : BB->getParentSet()) { ++ if (parent->getExecutionCount() > 0 && ++ parent->getExecutionCount() < min_neighbor_count) ++ min_neighbor_count = parent->getExecutionCount(); ++ } ++ for (BinaryBasicBlock *child : BB->getChildrenSet()) { ++ if (child->getExecutionCount() > 0 && ++ child->getExecutionCount() < min_neighbor_count) ++ min_neighbor_count = child->getExecutionCount(); ++ } ++ if (min_neighbor_count != std::numeric_limits::max()) { ++ BB->setExecutionCount(min_neighbor_count); ++ modified = 1; ++ } ++ } ++ return modified; ++} ++ ++uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryFunction &BF, ++ float threshold) { ++ uint64_t modified_total_func = 0; ++ const auto &Order = BF.dfs(); ++ for (auto *BBA : Order) { ++ auto &BB = *BBA; ++ modified_total_func += estimateBBCount(dataReaderRef, &BB, threshold); ++ } ++ return modified_total_func; ++} ++ + void DataReader::readSampleData(BinaryFunction &BF) { + FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); + if (!SampleDataOrErr) +@@ -600,6 +697,17 @@ void DataReader::readSampleData(BinaryFunction &BF) { + + BF.ExecutionCount = TotalEntryCount; + ++ if (opts::BlockCorrection) { ++ generateChildrenParentCount(BF); ++ std::unique_ptr FM = ++ std::make_unique(opts::BlockCorrection); ++ FM->inferenceFeatures(BF); ++ ++ float threshold = DataReader::getThreshold(); ++ uint64_t modified_total_func = estimateBBCount(this, BF, threshold); ++ DataReader::addModifiedBBTotal(modified_total_func); ++ } ++ + estimateEdgeCounts(BF); + } + +diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp +index c6ea0b009..4191e18bd 100644 +--- a/bolt/lib/Rewrite/RewriteInstance.cpp ++++ b/bolt/lib/Rewrite/RewriteInstance.cpp +@@ -106,6 +106,12 @@ cl::opt DumpDotAll( + "enable '-print-loops' for color-coded blocks"), + cl::Hidden, cl::cat(BoltCategory)); + ++cl::opt BlockCorrection( ++ "block-correction", ++ cl::desc("capture features useful for ML model to inference the count on the binary basic block" ++ " and correct them on CFG."), ++ cl::ZeroOrMore, cl::cat(BoltOptCategory)); ++ + static cl::list + ForceFunctionNames("funcs", + cl::CommaSeparated, -- -2.33.0 +2.39.3 (Apple Git-146) diff --git a/README.en.md b/README.en.md index 2672f23b82d0d3b314f057ef6f080dfd45de4a31..485d5a3ffa58191f8cc35faad18bbafd4e66ab0f 100644 --- a/README.en.md +++ b/README.en.md @@ -1,22 +1,9 @@ # llvm-bolt #### Description -llvm-bolt is a post-link optimizer developed to speed up large applications - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx +BOLT is a post-link optimizer developed to speed up large applications. +It achieves the improvements by optimizing application's code layout based +on execution profile gathered by sampling profiler, such as Linux perf tool. #### Contribution diff --git a/README.md b/README.md index c1a4057b4bc8c638d4c0cf08dca35568615ce43e..1e836ceb9aab2e8b5e57a1f52e41c6f77d7d1bcd 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,9 @@ # llvm-bolt #### 介绍 -llvm-bolt is a post-link optimizer developed to speed up large applications - -#### 软件架构 -软件架构说明 - - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx +BOLT is a post-link optimizer developed to speed up large applications. +It achieves the improvements by optimizing application's code layout based +on execution profile gathered by sampling profiler, such as Linux perf tool. #### 参与贡献 diff --git a/llvm-bolt.spec b/llvm-bolt.spec index b45c348978351b6c5500dfaf971b926b4321fe42..fd0a8f9ba36ab0e9603f87a5a75940abdf9a1c87 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -1,32 +1,41 @@ +%bcond_without sys_llvm %bcond_with check -%global maj_ver 15 +%global maj_ver 17 %global min_ver 0 -%global patch_ver 7 +%global patch_ver 6 %global bolt_version %{maj_ver}.%{min_ver}.%{patch_ver} %global bolt_srcdir llvm-project-%{bolt_version}.src +%if %{with sys_llvm} +%global pkg_name llvm-bolt %global install_prefix %{_prefix} +%else +%global pkg_name llvm-bolt%{maj_ver} +%global install_prefix %{_libdir}/llvm%{maj_ver} +%endif %global install_bindir %{install_prefix}/bin %global install_libdir %{install_prefix}/lib %global install_docdir %{install_prefix}/share/doc %global max_link_jobs 2 -Name: llvm-bolt +Name: %{pkg_name} Version: %{bolt_version} -Release: 3 +Release: 7 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache 2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz +Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz.sig -Patch1: 0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch -Patch2: 0002-AArch64-Add-AArch64-support-for-hugify.patch +Patch1: 0001-Fix-trap-value-for-non-X86.patch +Patch2: 0002-Add-test-for-emitting-trap-value.patch Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch -Patch4: 0004-Added-open-source-code-related-to-feature-extracting.patch -Patch5: 0005-Add-block-correction-optimization.patch +Patch4: 0004-Bolt-Solving-pie-support-issue.patch +Patch5: 0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch +Patch6: 0006-AArch64-Add-CFG-block-count-correction-optimization.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -77,6 +86,9 @@ Documentation for the BOLT optimizer -DLLVM_TARGETS_TO_BUILD="AArch64" %endif +# Set LD_LIBRARY_PATH now because we skip rpath generation and the build uses +# some just built libraries. +export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib} %ninja_build bolt %install @@ -93,9 +105,9 @@ find %{buildroot}%{install_prefix} \ ! -name "libbolt_rt_instr.a" \ -type f,l -exec rm -f '{}' \; -# -rm -f %{_builddir}/%{bolt_srcdir}/lib/lib*.a - +# Remove files installed during the build phase. +rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a + # There currently is not support upstream for building html doc from BOLT install -d %{buildroot}%{install_docdir} mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir} @@ -103,24 +115,17 @@ mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir} %check %if %{with check} -# Bolt makes incorrect assumptions on the location of libbolt_rt_*.a. -mkdir -p %{_builddir}/%{bolt_srcdir}/lib -ln -s %{buildroot}/%{install_libdir}/libbolt_rt_hugify.a %{_builddir}/%{bolt_srcdir}/lib -%ifarch x86_64 -ln -s %{buildroot}/%{install_libdir}/libbolt_rt_instr.a %{_builddir}/%{bolt_srcdir}/lib -%endif - %ifarch aarch64 # Failing test cases on aarch64 rm bolt/test/cache+-deprecated.test bolt/test/bolt-icf.test bolt/test/R_ABS.pic.lld.cpp %endif -export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/lib +export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib} export DESTDIR=%{buildroot} -%ninja_build check +%ninja_build check-bolt # Remove files installed during the check phase. -rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a +rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a %endif %files @@ -130,9 +135,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a %{install_bindir}/merge-fdata %{install_bindir}/perf2bolt %{install_bindir}/llvm-bolt-heatmap -%{install_libdir}/libbolt_rt_hugify.a %ifarch x86_64 +%{install_libdir}/libbolt_rt_hugify.a %{install_libdir}/libbolt_rt_instr.a %endif @@ -142,12 +147,36 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a %doc %{install_docdir} %changelog -* Wed Jun 5 2024 Zhou Zeping 15.0.7-3 -- Type:Update +* Fri Jul 12 2024 rfwang07 17.0.6-7 +- Type:Feature - ID:NA - SUG:NA - DESC: Add CFG block count correction optimization. +* Fri Jun 21 2024 rfwang07 17.0.6-6 +- Type:Backport +- ID:NA +- SUG:NA +- DESC: Backport bugfix. + +* Tue Jun 18 2024 Xiong Zhou 17.0.6-5 +- Type:Feature +- ID:NA +- SUG:NA +- DESC: Add AArch64 support for inline. + +* Tue Jun 18 2024 Xiong Zhou 17.0.6-4 +- Type:Backport +- ID:NA +- SUG:NA +- DESC: Backport bugfix. + +* Tue Jun 18 2024 Xiong Zhou 17.0.6-3 +- Type:Update +- ID:NA +- SUG:NA +- DESC: Update to version 17.0.6 + * Thu Sep 7 2023 Xiong Zhou 15.0.7-2 - Type:Update - ID:NA diff --git a/llvm-project-15.0.7.src.tar.xz b/llvm-project-17.0.6.src.tar.xz similarity index 79% rename from llvm-project-15.0.7.src.tar.xz rename to llvm-project-17.0.6.src.tar.xz index 7bc21504b4b3827c22e460f5c3ecd78b5b4bf67d..cdcb13184e9bf3d86998f2e0f937d3a6e15da999 100644 Binary files a/llvm-project-15.0.7.src.tar.xz and b/llvm-project-17.0.6.src.tar.xz differ diff --git a/llvm-project-17.0.6.src.tar.xz.sig b/llvm-project-17.0.6.src.tar.xz.sig new file mode 100644 index 0000000000000000000000000000000000000000..59c3713127177606f6f6d95bf61790e3d7ad15cf Binary files /dev/null and b/llvm-project-17.0.6.src.tar.xz.sig differ