diff --git a/0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch b/0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch new file mode 100644 index 0000000000000000000000000000000000000000..15398a9058bdbc3a6811dc4649e540ade6789654 --- /dev/null +++ b/0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch @@ -0,0 +1,135 @@ +From c62ab1487115a74d72ad23fd89b42076d5726bde Mon Sep 17 00:00:00 2001 +From: xiongzhou4 +Date: Mon, 24 Jul 2023 19:47:46 +0800 +Subject: [PATCH] [AArch64] fix bug #55005 handle DW_CFA_GNU_NegateRAState. + backport: https://reviews.llvm.org/rG9921197920fc3e9ad9605bd8fe0e835ca2dd41a5 + +--- + bolt/lib/Core/Exceptions.cpp | 19 ++++-- + .../Inputs/dw_cfa_gnu_window_save.yaml | 62 +++++++++++++++++++ + bolt/test/AArch64/dw_cfa_gnu_window_save.test | 8 +++ + 3 files changed, 83 insertions(+), 6 deletions(-) + create mode 100644 bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml + create mode 100644 bolt/test/AArch64/dw_cfa_gnu_window_save.test + +diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp +index 79404ca87..b0aa8b990 100644 +--- a/bolt/lib/Core/Exceptions.cpp ++++ b/bolt/lib/Core/Exceptions.cpp +@@ -644,18 +644,25 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { + errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; + return false; + case DW_CFA_GNU_window_save: ++ // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same ++ // id but mean different things. The latter is used in AArch64. ++ if (Function.getBinaryContext().isAArch64()) { ++ Function.addCFIInstruction( ++ Offset, MCCFIInstruction::createNegateRAState(nullptr)); ++ break; ++ } ++ if (opts::Verbosity >= 1) ++ errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n"; ++ return false; + case DW_CFA_lo_user: + case DW_CFA_hi_user: +- if (opts::Verbosity >= 1) { +- errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user " +- "unimplemented\n"; +- } ++ if (opts::Verbosity >= 1) ++ errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n"; + return false; + default: +- if (opts::Verbosity >= 1) { ++ if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode + << '\n'; +- } + return false; + } + +diff --git a/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml +new file mode 100644 +index 000000000..faa32e089 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml +@@ -0,0 +1,62 @@ ++--- !ELF ++FileHeader: ++ Class: ELFCLASS64 ++ Data: ELFDATA2LSB ++ Type: ET_EXEC ++ Machine: EM_AARCH64 ++ Entry: 0x4100C0 ++ProgramHeaders: ++ - Type: PT_LOAD ++ Flags: [ PF_X, PF_R ] ++ FirstSec: .init ++ LastSec: .fini ++ VAddr: 0x410000 ++ Align: 0x10000 ++Sections: ++ - Name: .init ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] ++ Address: 0x410000 ++ AddressAlign: 0x4 ++ Offset: 0x10000 ++ Content: 3F2303D5FD7BBFA9FD0300913F000094FD7BC1A8BF2303D5C0035FD6 ++ - Name: .plt ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] ++ Address: 0x410020 ++ AddressAlign: 0x10 ++ Content: F07BBFA9700100F011FE47F910E23F9120021FD61F2003D51F2003D51F2003D590010090110240F91002009120021FD690010090110640F91022009120021FD690010090110A40F91042009120021FD6 ++ - Name: .text ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] ++ Address: 0x410080 ++ AddressAlign: 0x40 ++ Content: 00008052C0035FD61F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D55F2403D51D0080D21E0080D2E50300AAE10340F9E2230091E60300910000009000D00391030080D2040080D2D5FFFF97D8FFFF975F2403D5E2FFFF171F2003D55F2403D5C0035FD6600100F000F047F9400000B4D3FFFF17C0035FD61F2003D5800100908101009000800091218000913F0000EBC000005481000090210840F9610000B4F00301AA00021FD6C0035FD680010090810100900080009121800091210000CB22FC7FD3410C818B21FC4193C10000B482000090420C40F9620000B4F00302AA00021FD6C0035FD63F2303D5FD7BBEA9FD030091F30B00F9930100906082403980000035DEFFFF972000805260820039F30B40F9FD7BC2A8BF2303D5C0035FD65F2403D5E2FFFF17 ++ - Name: .fini ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] ++ Address: 0x4101CC ++ AddressAlign: 0x4 ++ Content: 3F2303D5FD7BBFA9FD030091FD7BC1A8BF2303D5C0035FD6 ++ - Name: .eh_frame ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC ] ++ Address: 0x420068 ++ AddressAlign: 0x8 ++ Content: 1000000000000000017A520004781E011B0C1F0010000000180000003C00FFFF3C0000000041071E140000002C0000006800FFFF08000000000000000000000010000000440000007000FFFF300000000000000010000000580000008C00FFFF3C00000000000000240000006C000000B400FFFF3800000000412D410E209D049E0342930248DEDDD30E00412D0000001400000094000000C400FFFF08000000000000000000000010000000AC00000068FFFEFF080000000000000000000000 ++ - Name: .rela.text ++ Type: SHT_RELA ++ Flags: [ SHF_INFO_LINK ] ++ Link: .symtab ++ AddressAlign: 0x8 ++ Info: .text ++Symbols: ++ - Name: .text ++ Type: STT_SECTION ++ Section: .text ++ Value: 0x410080 ++ - Name: _ITM_deregisterTMCloneTable ++ Binding: STB_WEAK ++ - Name: _ITM_registerTMCloneTable ++ Binding: STB_WEAK ++... +diff --git a/bolt/test/AArch64/dw_cfa_gnu_window_save.test b/bolt/test/AArch64/dw_cfa_gnu_window_save.test +new file mode 100644 +index 000000000..2e044b399 +--- /dev/null ++++ b/bolt/test/AArch64/dw_cfa_gnu_window_save.test +@@ -0,0 +1,8 @@ ++# Check that llvm-bolt can handle DW_CFA_GNU_window_save on AArch64. ++ ++RUN: yaml2obj %p/Inputs/dw_cfa_gnu_window_save.yaml &> %t.exe ++RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s ++ ++CHECK-NOT: paciasp ++CHECK-NOT: autiasp ++CHECK-NOT: ERROR: unable to fill CFI. +-- +2.33.0 + diff --git a/0002-AArch64-Add-AArch64-support-for-hugify.patch b/0002-AArch64-Add-AArch64-support-for-hugify.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4adef69537cb26bc3d88f7b7ef29667b0b4a52a --- /dev/null +++ b/0002-AArch64-Add-AArch64-support-for-hugify.patch @@ -0,0 +1,465 @@ +From 81a80dbe9f47f728bc593d05cd5708a653a23f1c Mon Sep 17 00:00:00 2001 +From: xiongzhou4 +Date: Mon, 11 Sep 2023 11:33:41 +0800 +Subject: [PATCH] [AArch64] Add AArch64 support for hugify. + +--- + bolt/CMakeLists.txt | 4 +- + bolt/runtime/CMakeLists.txt | 28 ++- + bolt/runtime/common.h | 224 ++++++++++++++++++ + bolt/runtime/hugify.cpp | 21 +- + .../AArch64/Inputs/user_func_order.txt | 2 + + bolt/test/runtime/AArch64/user-func-reorder.c | 44 ++++ + 6 files changed, 305 insertions(+), 18 deletions(-) + create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt + create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c + +diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt +index a97878cd3..3de930496 100644 +--- a/bolt/CMakeLists.txt ++++ b/bolt/CMakeLists.txt +@@ -5,7 +5,7 @@ set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + set(CMAKE_CXX_STANDARD 14) + + set(BOLT_ENABLE_RUNTIME OFF) +-if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") ++if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|aarch64") + set(BOLT_ENABLE_RUNTIME ON) + endif() + +@@ -45,7 +45,7 @@ if (LLVM_INCLUDE_TESTS) + endif() + + if (BOLT_ENABLE_RUNTIME) +- message(STATUS "Building BOLT runtime libraries for X86") ++ message(STATUS "Building BOLT runtime libraries") + ExternalProject_Add(bolt_rt + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime" + STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps +diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt +index 7c1b79af4..ee6ab7bd4 100644 +--- a/bolt/runtime/CMakeLists.txt ++++ b/bolt/runtime/CMakeLists.txt +@@ -10,10 +10,12 @@ check_include_files(elf.h HAVE_ELF_H) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/config.h) + +-add_library(bolt_rt_instr STATIC +- instr.cpp +- ${CMAKE_CURRENT_BINARY_DIR}/config.h +- ) ++if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") ++ add_library(bolt_rt_instr STATIC ++ instr.cpp ++ ${CMAKE_CURRENT_BINARY_DIR}/config.h ++ ) ++endif() + add_library(bolt_rt_hugify STATIC + hugify.cpp + ${CMAKE_CURRENT_BINARY_DIR}/config.h +@@ -23,16 +25,24 @@ set(BOLT_RT_FLAGS + -ffreestanding + -fno-exceptions + -fno-rtti +- -fno-stack-protector +- -mno-sse) ++ -fno-stack-protector) ++ ++# x86 exclusive option ++if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") ++ list(APPEND BOLT_RT_FLAGS -mno-sse) ++endif() + + # Don't let the compiler think it can create calls to standard libs +-target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) +-target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) ++if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") ++ target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) ++ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) ++endif() + target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS}) + target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + +-install(TARGETS bolt_rt_instr DESTINATION lib) ++if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") ++ install(TARGETS bolt_rt_instr DESTINATION lib) ++endif() + install(TARGETS bolt_rt_hugify DESTINATION lib) + + if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*") +diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h +index 008dbb6c3..6869742e7 100644 +--- a/bolt/runtime/common.h ++++ b/bolt/runtime/common.h +@@ -39,6 +39,45 @@ typedef int int32_t; + #endif + + // Save all registers while keeping 16B stack alignment ++#if defined (__aarch64__) ++#define SAVE_ALL \ ++ "stp x0, x1, [sp, #-16]!\n" \ ++ "stp x2, x3, [sp, #-16]!\n" \ ++ "stp x4, x5, [sp, #-16]!\n" \ ++ "stp x6, x7, [sp, #-16]!\n" \ ++ "stp x8, x9, [sp, #-16]!\n" \ ++ "stp x10, x11, [sp, #-16]!\n" \ ++ "stp x12, x13, [sp, #-16]!\n" \ ++ "stp x14, x15, [sp, #-16]!\n" \ ++ "stp x16, x17, [sp, #-16]!\n" \ ++ "stp x18, x19, [sp, #-16]!\n" \ ++ "stp x20, x21, [sp, #-16]!\n" \ ++ "stp x22, x23, [sp, #-16]!\n" \ ++ "stp x24, x25, [sp, #-16]!\n" \ ++ "stp x26, x27, [sp, #-16]!\n" \ ++ "stp x28, x29, [sp, #-16]!\n" \ ++ "stp x30, xzr, [sp, #-16]!\n" ++ ++// Mirrors SAVE_ALL ++#define RESTORE_ALL \ ++ "ldp x30, xzr, [sp], #16\n" \ ++ "ldp x28, x29, [sp], #16\n" \ ++ "ldp x26, x27, [sp], #16\n" \ ++ "ldp x24, x25, [sp], #16\n" \ ++ "ldp x22, x23, [sp], #16\n" \ ++ "ldp x20, x21, [sp], #16\n" \ ++ "ldp x18, x19, [sp], #16\n" \ ++ "ldp x16, x17, [sp], #16\n" \ ++ "ldp x14, x15, [sp], #16\n" \ ++ "ldp x12, x13, [sp], #16\n" \ ++ "ldp x10, x11, [sp], #16\n" \ ++ "ldp x8, x9, [sp], #16\n" \ ++ "ldp x6, x7, [sp], #16\n" \ ++ "ldp x4, x5, [sp], #16\n" \ ++ "ldp x2, x3, [sp], #16\n" \ ++ "ldp x0, x1, [sp], #16\n" ++ ++#else + #define SAVE_ALL \ + "push %%rax\n" \ + "push %%rbx\n" \ +@@ -75,6 +114,7 @@ typedef int int32_t; + "pop %%rcx\n" \ + "pop %%rbx\n" \ + "pop %%rax\n" ++#endif + + // Functions that are required by freestanding environment. Compiler may + // generate calls to these implicitly. +@@ -129,6 +169,189 @@ constexpr uint32_t BufSize = 10240; + #define _STRINGIFY(x) #x + #define STRINGIFY(x) _STRINGIFY(x) + ++#if defined (__aarch64__) ++// Declare some syscall wrappers we use throughout this code to avoid linking ++// against system libc. ++uint64_t __read(uint64_t fd, const void *buf, uint64_t count) { ++ uint64_t ret; ++ register uint64_t x0 __asm__("x0") = fd; ++ register const void *x1 __asm__("x1") = buf; ++ register uint64_t x2 __asm__("x2") = count; ++ register uint32_t w8 __asm__("w8") = 63; ++ __asm__ __volatile__("svc #0\n" ++ "mov %0, x0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(x2), "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++uint64_t __write(uint64_t fd, const void *buf, uint64_t count) { ++ uint64_t ret; ++ register uint64_t x0 __asm__("x0") = fd; ++ register const void *x1 __asm__("x1") = buf; ++ register uint64_t x2 __asm__("x2") = count; ++ register uint32_t w8 __asm__("w8") = 64; ++ __asm__ __volatile__("svc #0\n" ++ "mov %0, x0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(x2), "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags, ++ uint64_t fd, uint64_t offset) { ++ void *ret; ++ register uint64_t x0 __asm__("x0") = addr; ++ register uint64_t x1 __asm__("x1") = size; ++ register uint64_t x2 __asm__("x2") = prot; ++ register uint64_t x3 __asm__("x3") = flags; ++ register uint64_t x4 __asm__("x4") = fd; ++ register uint64_t x5 __asm__("x5") = offset; ++ register uint32_t w8 __asm__("w8") = 222; ++ __asm__ __volatile__("svc #0\n" ++ "mov %0, x0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++uint64_t __munmap(void *addr, uint64_t size) { ++ uint64_t ret; ++ register void *x0 __asm__("x0") = addr; ++ register uint64_t x1 __asm__("x1") = size; ++ register uint32_t w8 __asm__("w8") = 215; ++ __asm__ __volatile__("svc #0\n" ++ "mov %0, x0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++uint64_t __exit(uint64_t code) { ++ uint64_t ret; ++ register uint64_t x0 __asm__("x0") = code; ++ register uint32_t w8 __asm__("w8") = 94; ++ __asm__ __volatile__("svc #0\n" ++ "mov %0, x0" ++ : "=r"(ret), "+r"(x0) ++ : "r"(w8) ++ : "cc", "memory", "x1"); ++ return ret; ++} ++ ++uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) { ++ uint64_t ret; ++ register int x0 __asm__("x0") = -100; ++ register const char *x1 __asm__("x1") = pathname; ++ register uint64_t x2 __asm__("x2") = flags; ++ register uint64_t x3 __asm__("x3") = mode; ++ register uint32_t w8 __asm__("w8") = 56; ++ __asm__ __volatile__("svc #0\n" ++ "mov %0, x0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(x2), "r"(x3), "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++int __madvise(void *addr, size_t length, int advice) { ++ int ret; ++ register void *x0 __asm__("x0") = addr; ++ register size_t x1 __asm__("x1") = length; ++ register int x2 __asm__("x2") = advice; ++ register uint32_t w8 __asm__("w8") = 233; ++ __asm__ __volatile__("svc #0\n" ++ "mov %w0, w0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(x2), "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++int __mprotect(void *addr, size_t len, int prot) { ++ int ret; ++ register void *x0 __asm__("x0") = addr; ++ register size_t x1 __asm__("x1") = len; ++ register int x2 __asm__("x2") = prot; ++ register uint32_t w8 __asm__("w8") = 226; ++ __asm__ __volatile__("svc #0\n" ++ "mov %w0, w0" ++ : "=r"(ret), "+r"(x0), "+r"(x1) ++ : "r"(x2), "r"(w8) ++ : "cc", "memory"); ++ return ret; ++} ++ ++// Helper functions for writing strings to the .fdata file. We intentionally ++// avoid using libc names to make it clear it is our impl. ++ ++/// Compare two strings, at most Num bytes. ++int strnCmp(const char *Str1, const char *Str2, size_t Num) { ++ while (Num && *Str1 && (*Str1 == *Str2)) { ++ Num--; ++ Str1++; ++ Str2++; ++ } ++ if (Num == 0) ++ return 0; ++ return *(unsigned char *)Str1 - *(unsigned char *)Str2; ++} ++ ++uint32_t strLen(const char *Str) { ++ uint32_t Size = 0; ++ while (*Str++) ++ ++Size; ++ return Size; ++} ++ ++/// Write number Num using Base to the buffer in OutBuf, returns a pointer to ++/// the end of the string. ++char *intToStr(char *OutBuf, uint64_t Num, uint32_t Base) { ++ const char *Chars = "0123456789abcdef"; ++ char Buf[21]; ++ char *Ptr = Buf; ++ while (Num) { ++ *Ptr++ = *(Chars + (Num % Base)); ++ Num /= Base; ++ } ++ if (Ptr == Buf) { ++ *OutBuf++ = '0'; ++ return OutBuf; ++ } ++ while (Ptr != Buf) ++ *OutBuf++ = *--Ptr; ++ ++ return OutBuf; ++} ++ ++/// Copy Str to OutBuf, returns a pointer to the end of the copied string ++char *strCopy(char *OutBuf, const char *Str, int32_t Size = BufSize) { ++ while (*Str) { ++ *OutBuf++ = *Str++; ++ if (--Size <= 0) ++ return OutBuf; ++ } ++ return OutBuf; ++} ++ ++void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) { ++ char Buf[BufSize]; ++ char *Ptr = Buf; ++ Ptr = strCopy(Ptr, Msg, BufSize - 23); ++ Ptr = intToStr(Ptr, Num, Base); ++ Ptr = strCopy(Ptr, "\n"); ++ __write(2, Buf, Ptr - Buf); ++} ++ ++void reportError(const char *Msg, uint64_t Size) { ++ __write(2, Msg, Size); ++ __exit(1); ++} ++#else + uint64_t __read(uint64_t fd, const void *buf, uint64_t count) { + uint64_t ret; + #if defined(__APPLE__) +@@ -550,5 +773,6 @@ public: + inline uint64_t alignTo(uint64_t Value, uint64_t Align) { + return (Value + Align - 1) / Align * Align; + } ++#endif + + } // anonymous namespace +diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp +index 69e1a7e06..385e4d147 100644 +--- a/bolt/runtime/hugify.cpp ++++ b/bolt/runtime/hugify.cpp +@@ -6,26 +6,25 @@ + // + //===----------------------------------------------------------------------===// + +-#if defined (__x86_64__) + #if !defined(__APPLE__) + + #include "common.h" + #include + + // Enables a very verbose logging to stderr useful when debugging +-//#define ENABLE_DEBUG ++// #define ENABLE_DEBUG + + // Function pointers to init routines in the binary, so we can resume + // regular execution of the function that we hooked. + extern void (*__bolt_hugify_init_ptr)(); + + // The __hot_start and __hot_end symbols set by Bolt. We use them to figure +-// out the rage for marking huge pages. ++// out the range for marking huge pages. + extern uint64_t __hot_start; + extern uint64_t __hot_end; + + #ifdef MADV_HUGEPAGE +-/// Check whether the kernel supports THP via corresponding sysfs entry. ++// Check whether the kernel supports THP via corresponding sysfs entry. + static bool has_pagecache_thp_support() { + char buf[256] = {0}; + const char *madviseStr = "always [madvise] never"; +@@ -116,14 +115,22 @@ extern "C" void __bolt_hugify_self_impl() { + #endif + } + +-/// This is hooking ELF's entry, it needs to save all machine state. ++// This is hooking ELF's entry, it needs to save all machine state. + extern "C" __attribute((naked)) void __bolt_hugify_self() { ++#if defined (__x86_64__) + __asm__ __volatile__(SAVE_ALL + "call __bolt_hugify_self_impl\n" + RESTORE_ALL + "jmp *__bolt_hugify_init_ptr(%%rip)\n" + :::); +-} +- ++#elif defined (__aarch64__) ++ __asm__ __volatile__(SAVE_ALL ++ "bl __bolt_hugify_self_impl\n" ++ RESTORE_ALL ++ "ldr x16, =__bolt_hugify_init_ptr\n" ++ "ldr x16, [x16]\n" ++ "br x16\n" ++ :::); + #endif ++} + #endif +diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt +new file mode 100644 +index 000000000..48b76cd35 +--- /dev/null ++++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt +@@ -0,0 +1,2 @@ ++main ++fib +diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c +new file mode 100644 +index 000000000..fcb92bca1 +--- /dev/null ++++ b/bolt/test/runtime/AArch64/user-func-reorder.c +@@ -0,0 +1,44 @@ ++/* Checks that BOLT correctly processes a user-provided function list file, ++ * reorder functions according to this list, update hot_start and hot_end ++ * symbols and insert a function to perform hot text mapping during program ++ * startup. ++ */ ++#include ++ ++int foo(int x) { ++ return x + 1; ++} ++ ++int fib(int x) { ++ if (x < 2) ++ return x; ++ return fib(x - 1) + fib(x - 2); ++} ++ ++int bar(int x) { ++ return x - 1; ++} ++ ++int main(int argc, char **argv) { ++ printf("fib(%d) = %d\n", argc, fib(argc)); ++ return 0; ++} ++ ++/* ++REQUIRES: system-linux,bolt-runtime ++ ++RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q ++ ++RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ ++RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t ++RUN: llvm-nm --numeric-sort --print-armap %t | \ ++RUN: FileCheck %s -check-prefix=CHECK-NM ++RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT ++ ++CHECK-NM: W __hot_start ++CHECK-NM: T main ++CHECK-NM-NEXT: T fib ++CHECK-NM-NEXT: W __hot_end ++ ++CHECK-OUTPUT: fib(4) = 3 ++*/ +-- +2.33.0 + diff --git a/0003-AArch64-Add-AArch64-support-for-inline.patch b/0003-AArch64-Add-AArch64-support-for-inline.patch new file mode 100644 index 0000000000000000000000000000000000000000..2ce33e98d6389ccf22eeb5fb0fcf27d89e1f8efd --- /dev/null +++ b/0003-AArch64-Add-AArch64-support-for-inline.patch @@ -0,0 +1,259 @@ +From b26ff1c328435d7b0ceccec1dcc25252821ad373 Mon Sep 17 00:00:00 2001 +From: xiongzhou4 +Date: Mon, 11 Sep 2023 14:43:12 +0800 +Subject: [PATCH] [AArch64] Add AArch64 support for inline. + +--- + bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ + .../Target/AArch64/AArch64MCPlusBuilder.cpp | 12 +++++++ + bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ + bolt/test/AArch64/Inputs/inline-main.c | 5 +++ + bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ + bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ + bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ + bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ + bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ + bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ + 12 files changed, 137 insertions(+) + create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c + create mode 100644 bolt/test/AArch64/Inputs/inline-main.c + create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp + create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp + create mode 100644 bolt/test/AArch64/inline-debug-info.test + create mode 100644 bolt/test/AArch64/inlined-function-mixed.test + create mode 100644 bolt/test/AArch64/jmp-optimization.test + +diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp +index 04232bd3e..d009d59dc 100644 +--- a/bolt/lib/Passes/Inliner.cpp ++++ b/bolt/lib/Passes/Inliner.cpp +@@ -464,6 +464,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { + << ". Size change: " << SizeAfterInlining + << " bytes.\n"); + ++// Skip situations where some A64 instructions can't be inlined: ++// # Indirect branch, e.g., BR. ++// # Branch instructions but used to make a function call. ++ if (BC.isAArch64()) { ++ auto &MIB = *BC.MIB; ++ bool skip = false; ++ for (const BinaryBasicBlock &BB : *TargetFunction) { ++ for (MCInst Inst : BB) { ++ if (MIB.isPseudo(Inst)) ++ continue; ++ ++ MIB.stripAnnotations(Inst, false); ++ ++ if (MIB.isBranch(Inst)) { ++ const BinaryBasicBlock *TargetBB = ++ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); ++ if (MIB.isIndirectBranch(Inst) || !TargetBB) { ++ skip = true; ++ break; ++ } ++ } ++ } ++ if (skip) ++ break; ++ } ++ if (skip) { ++ ++InstIt; ++ continue; ++ } ++ } ++ + std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); + + DidInlining = true; +diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +index c736196a8..03b1b536f 100644 +--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp ++++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +@@ -34,6 +34,16 @@ public: + const MCRegisterInfo *RegInfo) + : MCPlusBuilder(Analysis, Info, RegInfo) {} + ++ MCPhysReg getStackPointer() const override { return AArch64::SP; } ++ ++ bool createCall(MCInst &Inst, const MCSymbol *Target, ++ MCContext *Ctx) override { ++ Inst.setOpcode(AArch64::BL); ++ Inst.addOperand(MCOperand::createExpr( ++ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); ++ return true; ++ } ++ + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, + CompFuncTy Comp) const override { + const auto &AArch64ExprA = cast(A); +@@ -1103,6 +1113,8 @@ public: + + bool isLeave(const MCInst &Inst) const override { return false; } + ++ bool isPush(const MCInst &Inst) const override { return false; } ++ + bool isPop(const MCInst &Inst) const override { return false; } + + bool isPrefix(const MCInst &Inst) const override { return false; } +diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c +new file mode 100644 +index 000000000..1307c13f2 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-foo.c +@@ -0,0 +1,5 @@ ++#include "stub.h" ++ ++void foo() { ++ puts("Hello world!\n"); ++} +diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c +new file mode 100644 +index 000000000..7853d2b63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-main.c +@@ -0,0 +1,5 @@ ++extern void foo(); ++int main() { ++ foo(); ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp +new file mode 100644 +index 000000000..a6ff9e262 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlined.cpp +@@ -0,0 +1,23 @@ ++extern "C" int printf(const char*, ...); ++extern const char* question(); ++ ++inline int answer() __attribute__((always_inline)); ++inline int answer() { return 42; } ++ ++int main(int argc, char *argv[]) { ++ int ans; ++ if (argc == 1) { ++ ans = 0; ++ } else { ++ ans = argc; ++ } ++ printf("%s\n", question()); ++ for (int i = 0; i < 10; ++i) { ++ int x = answer(); ++ int y = answer(); ++ ans += x - y; ++ } ++ // padding to make sure question() is inlineable ++ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); ++ return ans; ++} +diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp +new file mode 100644 +index 000000000..edb7ab145 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlinee.cpp +@@ -0,0 +1,3 @@ ++const char* question() { ++ return "What do you get if you multiply six by nine?"; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp +new file mode 100644 +index 000000000..cd6d53c35 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp +@@ -0,0 +1,7 @@ ++int g(); ++ ++int main() { ++ int x = g(); ++ int y = x*x; ++ return y; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +new file mode 100644 +index 000000000..80b853d63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +@@ -0,0 +1,3 @@ ++int f() { ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +new file mode 100644 +index 000000000..7fb551163 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +@@ -0,0 +1,3 @@ ++int f(); ++ ++int g() { return f(); } +diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test +new file mode 100644 +index 000000000..e20e5e31e +--- /dev/null ++++ b/bolt/test/AArch64/inline-debug-info.test +@@ -0,0 +1,20 @@ ++## Check that BOLT correctly prints and updates debug info for inlined ++## functions. ++ ++# REQUIRES: system-linux ++ ++# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ ++# RUN: -I%p/../Inputs -o %t.exe -Wl,-q ++# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ ++# RUN: --print-only=main --print-after-lowering --force-inline=foo \ ++# RUN: -o %t.bolt \ ++# RUN: | FileCheck %s ++ ++## The call to puts() should come from inline-foo.c: ++# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 ++ ++# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ ++# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP ++ ++## Dump of main() should include debug info from inline-foo.c after inlining: ++# CHECK-OBJDUMP: inline-foo.c:4 +diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test +new file mode 100644 +index 000000000..5a87bdde9 +--- /dev/null ++++ b/bolt/test/AArch64/inlined-function-mixed.test +@@ -0,0 +1,11 @@ ++# Make sure inlining from a unit with debug info into unit without ++# debug info does not cause a crash. ++ ++RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o ++RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g ++RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t ++ ++RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ ++RUN: --inline-small-functions --force-inline=main | FileCheck %s ++ ++CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten +diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test +new file mode 100644 +index 000000000..92f4b9a14 +--- /dev/null ++++ b/bolt/test/AArch64/jmp-optimization.test +@@ -0,0 +1,14 @@ ++# Tests the optimization of functions that just do a tail call in the beginning. ++ ++# This test has commands that rely on shell capabilities that won't execute ++# correctly on Windows e.g. unsupported parameter expansion ++REQUIRES: shell ++ ++RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t ++RUN: llvm-bolt -inline-small-functions %t -o %t.bolt ++RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s ++ ++CHECK:
: ++CHECK-NOT: call ++CHECK: xorl %eax, %eax ++CHECK: retq +-- +2.33.0 + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index 61b3f6195b00182f06b93eabefcbaf49295530c7..4107703fbb580af24cb04f0036dcbc8197294b8d 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -15,13 +15,17 @@ Name: llvm-bolt Version: %{bolt_version} -Release: 1 +Release: 2 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache 2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz +Patch1: 0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch +Patch2: 0002-AArch64-Add-AArch64-support-for-hugify.patch +Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch + BuildRequires: gcc BuildRequires: gcc-c++ BuildRequires: cmake @@ -97,13 +101,11 @@ mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir} %check %if %{with check} -%ifarch x86_64 # Bolt makes incorrect assumptions on the location of libbolt_rt_*.a. mkdir -p %{_builddir}/%{bolt_srcdir}/lib -for rt in libbolt_rt_instr libbolt_rt_hugify -do - ln -s %{buildroot}/%{install_libdir}/${rt}.a %{_builddir}/%{bolt_srcdir}/lib -done +ln -s %{buildroot}/%{install_libdir}/libbolt_rt_hugify.a %{_builddir}/%{bolt_srcdir}/lib +%ifarch x86_64 +ln -s %{buildroot}/%{install_libdir}/libbolt_rt_instr.a %{_builddir}/%{bolt_srcdir}/lib %endif %ifarch aarch64 @@ -126,9 +128,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a %{install_bindir}/merge-fdata %{install_bindir}/perf2bolt %{install_bindir}/llvm-bolt-heatmap +%{install_libdir}/libbolt_rt_hugify.a %ifarch x86_64 -%{install_libdir}/libbolt_rt_hugify.a %{install_libdir}/libbolt_rt_instr.a %endif @@ -137,8 +139,13 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a %files doc %doc %{install_docdir} - %changelog +* Thu Sep 7 2023 Xiong Zhou 15.0.7-2 +- Type:Update +- ID:NA +- SUG:NA +- DESC: Backport bugfix and add AArch64 support for hugify and inline. + * Thu Aug 31 2023 zhenyu zhao 15.0.7-1 - Type:Update - ID:NA @@ -161,5 +168,4 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a - Type:Init - ID:NA - SUG:NA -- DESC:Init llvm-bolt repository - +- DESC:Init llvm-bolt repository