diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d19e6a99cdfd03f82ba932f55fddd5a57ee832a4..d6064ecabcee84341dc0d7ed4b68a5a93bef26ac 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -43,6 +43,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" +#include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -103,13 +104,22 @@ extern cl::opt DebugInfoCorrelate; static cl::opt ClSanitizeOnOptimizerEarlyEP( "sanitizer-early-opt-ep", cl::Optional, cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false)); -} + +extern cl::opt ProfileCorrelate; + +// Re-link builtin bitcodes after optimization +cl::opt ClRelinkBuiltinBitcodePostop( + "relink-builtin-bitcode-postop", cl::Optional, + cl::desc("Re-link builtin bitcodes after optimization."), cl::init(false)); +} // namespace llvm namespace { // Default filename used for profile generation. std::string getDefaultProfileGenName() { - return DebugInfoCorrelate ? "default_%p.proflite" : "default_%m.profraw"; + return DebugInfoCorrelate || ProfileCorrelate != InstrProfCorrelator::NONE + ? "default_%m.proflite" + : "default_%m.profraw"; } class EmitAssemblyHelper { @@ -202,7 +212,7 @@ public: void EmitAssembly(BackendAction Action, std::unique_ptr OS); }; -} +} // namespace static SanitizerCoverageOptions getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 3b8e4d7e133a3d9978755b9db8a7f0914cfbb3f0..ffc8a5eea184fff1896cf572d3fa59759cab0bdd 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1405,7 +1405,7 @@ void Darwin::addProfileRTLibs(const ArgList &Args, addExportedSymbol(CmdArgs, "_reset_fn_list"); } - // Align __llvm_prf_{cnts,data} sections to the maximum expected page + // Align __llvm_prf_{cnts,bits,data} sections to the maximum expected page // alignment. This allows profile counters to be mmap()'d to disk. Note that // it's not enough to just page-align __llvm_prf_cnts: the following section // must also be page-aligned so that its data is not clobbered by mmap(). @@ -1415,7 +1415,7 @@ void Darwin::addProfileRTLibs(const ArgList &Args, // extra alignment also allows the same binary to be used with/without sync // enabled. if (!ForGCOV) { - for (auto IPSK : {llvm::IPSK_cnts, llvm::IPSK_data}) { + for (auto IPSK : {llvm::IPSK_cnts, llvm::IPSK_bitmap, llvm::IPSK_data}) { addSectalignToPage( Args, CmdArgs, "__DATA", llvm::getInstrProfSectionName(IPSK, llvm::Triple::MachO, diff --git a/clang/test/Driver/darwin-ld.c b/clang/test/Driver/darwin-ld.c index 13ec69b2db1abc2fa3a304caaca78ec7afc473ad..4e2a408aa742c7f33e8e02d2f7b6c86632ae335d 100644 --- a/clang/test/Driver/darwin-ld.c +++ b/clang/test/Driver/darwin-ld.c @@ -336,7 +336,7 @@ // RUN: FileCheck -check-prefix=PROFILE_SECTALIGN %s < %t.log // RUN: %clang -target arm64-apple-ios12 -fprofile-instr-generate -### %t.o 2> %t.log // RUN: FileCheck -check-prefix=PROFILE_SECTALIGN %s < %t.log -// PROFILE_SECTALIGN: "-sectalign" "__DATA" "__llvm_prf_cnts" "0x4000" "-sectalign" "__DATA" "__llvm_prf_data" "0x4000" +// PROFILE_SECTALIGN: "-sectalign" "__DATA" "__llvm_prf_cnts" "0x4000" "-sectalign" "__DATA" "__llvm_prf_bits" "0x4000" "-sectalign" "__DATA" "__llvm_prf_data" "0x4000" // RUN: %clang -target x86_64-apple-darwin12 -fprofile-instr-generate --coverage -### %t.o 2> %t.log // RUN: FileCheck -check-prefix=NO_PROFILE_EXPORT %s < %t.log diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 94261f4705b96c4ba17d81da46ddbead1815c873..ea5330254e2901d88ccefbe8fd84e423c8204fce 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -76,6 +76,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ Inc->getHash()->getZExtValue())) INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr) +INSTR_PROF_DATA(const IntPtrT, IntPtrTy, BitmapPtr, RelativeBitmapPtr) /* This is used to map function pointers for the indirect call targets to * function name hashes during the conversion from raw to merged profile * data. @@ -87,10 +88,31 @@ INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ - ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) \ +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumBitmapBytes)) #undef INSTR_PROF_DATA /* INSTR_PROF_DATA end. */ +/* For a virtual table object, record the name hash to associate profiled + * addresses with global variables, and record {starting address, size in bytes} + * to map the profiled virtual table (which usually have an offset from the + * starting address) back to a virtual table object. */ +#ifndef INSTR_PROF_VTABLE_DATA +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_VTABLE_DATA_DEFINED +#endif +INSTR_PROF_VTABLE_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), \ + VTableNameHash, ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + IndexedInstrProf::ComputeHash(PGOVTableName))) +INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::PointerType::getUnqual(Ctx), \ + VTablePointer, VTableAddr) +INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ + VTableSizeVal)) +#undef INSTR_PROF_VTABLE_DATA +/* INSTR_PROF_VTABLE_DATA end. */ /* This is an internal data structure used by value profiler. It * is defined here to allow serialization code sharing by LLVM @@ -128,16 +150,20 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) -/* FIXME: A more accurate name is NumData */ -INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) +INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) -/* FIXME: A more accurate name is NumCounters */ -INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +INSTR_PROF_RAW_HEADER(uint64_t, NumCounters, NumCounters) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters) +INSTR_PROF_RAW_HEADER(uint64_t, NumBitmapBytes, NumBitmapBytes) +INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterBitmapBytes, PaddingBytesAfterBitmapBytes) INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin - (uintptr_t)DataBegin) +INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta, + (uintptr_t)BitmapBegin - (uintptr_t)DataBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables) +INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -179,13 +205,26 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target") /* For memory intrinsic functions size profiling. */ VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size") +/* For virtual table address profiling, the address point of the virtual table + * (i.e., the address contained in objects pointing to a virtual table) are + * profiled. Note this may not be the address of the per C++ class virtual table + * object (e.g., there might be an offset). + * + * The profiled addresses are stored in raw profile, together with the following + * two types of information. + * 1. The (starting and ending) addresses of per C++ class virtual table objects. + * 2. The (compressed) virtual table object names. + * RawInstrProfReader converts profiled virtual table addresses to virtual table + * objects' MD5 hash. + */ +VALUE_PROF_KIND(IPVK_VTableTarget, 2, "The profiled address point of the vtable") /* These two kinds must be the last to be * declared. This is to make sure the string * array created with the template can be * indexed with the kind value. */ VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first") -VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last") +VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last") #undef VALUE_PROF_KIND /* VALUE_PROF_KIND end */ @@ -269,15 +308,24 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ + INSTR_PROF_BITS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_name, \ INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ INSTR_PROF_NAME_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vname, \ + INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \ + INSTR_PROF_VNAME_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_vals, \ INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \ INSTR_PROF_VALS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \ INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \ INSTR_PROF_VNODES_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vtab, \ + INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \ + INSTR_PROF_VTAB_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_covmap, \ INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \ INSTR_PROF_COVMAP_COFF, "__LLVM_COV,") @@ -287,6 +335,12 @@ INSTR_PROF_SECT_ENTRY(IPSK_covfun, \ INSTR_PROF_SECT_ENTRY(IPSK_orderfile, \ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON), \ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_covdata, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVDATA_COMMON), \ + INSTR_PROF_COVDATA_COFF, "__LLVM_COV,") +INSTR_PROF_SECT_ENTRY(IPSK_covname, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVNAME_COMMON), \ + INSTR_PROF_COVNAME_COFF, "__LLVM_COV,") #undef INSTR_PROF_SECT_ENTRY #endif @@ -646,17 +700,16 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 -/* FIXME: Please remedy the fixme in the header before bumping the version. */ /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 8 +#define INSTR_PROF_RAW_VERSION 10 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 10 +#define INSTR_PROF_INDEX_VERSION 12 /* Coverage mapping format version (start from 0). */ -#define INSTR_PROF_COVMAP_VERSION 5 +#define INSTR_PROF_COVMAP_VERSION 6 -/* Profile version is always of type uint64_t. Reserve the upper 8 bits in the - * version for other variants of profile. We set the lowest bit of the upper 8 - * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation +/* Profile version is always of type uint64_t. Reserve the upper 32 bits in the + * version for other variants of profile. We set the 8th most significant bit + * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. * The 59th bit indicates whether to use debug info to correlate profiles. @@ -665,7 +718,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * The 62nd bit indicates whether memory profile information is present. * The 63rd bit indicates if this is a temporal profile. */ -#define VARIANT_MASKS_ALL 0xff00000000000000ULL +#define VARIANT_MASKS_ALL 0xffffffff00000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) @@ -688,22 +741,35 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, than WIN32 */ #define INSTR_PROF_DATA_COMMON __llvm_prf_data #define INSTR_PROF_NAME_COMMON __llvm_prf_names +#define INSTR_PROF_VNAME_COMMON __llvm_prf_vns #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds +#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab #define INSTR_PROF_COVMAP_COMMON __llvm_covmap #define INSTR_PROF_COVFUN_COMMON __llvm_covfun +#define INSTR_PROF_COVDATA_COMMON __llvm_covdata +#define INSTR_PROF_COVNAME_COMMON __llvm_covnames #define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile /* Windows section names. Because these section names contain dollar characters, * they must be quoted. */ #define INSTR_PROF_DATA_COFF ".lprfd$M" #define INSTR_PROF_NAME_COFF ".lprfn$M" +#define INSTR_PROF_VNAME_COFF ".lprfvn$M" #define INSTR_PROF_CNTS_COFF ".lprfc$M" +#define INSTR_PROF_BITS_COFF ".lprfb$M" #define INSTR_PROF_VALS_COFF ".lprfv$M" #define INSTR_PROF_VNODES_COFF ".lprfnd$M" +#define INSTR_PROF_VTAB_COFF ".lprfvt$M" #define INSTR_PROF_COVMAP_COFF ".lcovmap$M" #define INSTR_PROF_COVFUN_COFF ".lcovfun$M" +/* Since cov data and cov names sections are not allocated, we don't need to + * access them at runtime. + */ +#define INSTR_PROF_COVDATA_COFF ".lcovd" +#define INSTR_PROF_COVNAME_COFF ".lcovn" #define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M" #ifdef _WIN32 @@ -711,6 +777,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_BITS_COFF +#define INSTR_PROF_VTAB_SECT_NAME INSTR_PROF_VTAB_COFF +#define INSTR_PROF_VNAME_SECT_NAME INSTR_PROF_VNAME_COFF /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ @@ -719,12 +788,17 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF #define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF #define INSTR_PROF_COVFUN_SECT_NAME INSTR_PROF_COVFUN_COFF +#define INSTR_PROF_COVDATA_SECT_NAME INSTR_PROF_COVDATA_COFF +#define INSTR_PROF_COVNAME_SECT_NAME INSTR_PROF_COVNAME_COFF #define INSTR_PROF_ORDERFILE_SECT_NAME INSTR_PROF_ORDERFILE_COFF #else /* Runtime section names and name strings. */ #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON) #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON) #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON) +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON) +#define INSTR_PROF_VTAB_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON) +#define INSTR_PROF_VNAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON) /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ @@ -733,6 +807,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON) #define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON) #define INSTR_PROF_COVFUN_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVFUN_COMMON) +#define INSTR_PROF_COVDATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVDATA_COMMON) +#define INSTR_PROF_COVNAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVNAME_COMMON) /* Order file instrumentation. */ #define INSTR_PROF_ORDERFILE_SECT_NAME \ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON) diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c index 0dd5ff5ae6331cbbda44387840d24e283acffbe1..7d69e37815c948f1e9eea8421e462eb032a40e15 100644 --- a/compiler-rt/lib/profile/InstrProfiling.c +++ b/compiler-rt/lib/profile/InstrProfiling.c @@ -60,6 +60,10 @@ COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) { (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) ? 0xFF : 0; memset(I, ResetValue, E - I); + I = __llvm_profile_begin_bitmap(); + E = __llvm_profile_end_bitmap(); + memset(I, 0x0, E - I); + const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const __llvm_profile_data *DI; @@ -85,3 +89,7 @@ COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) { } lprofSetProfileDumped(0); } + +inline int hasCorrelation() { + return (__llvm_profile_get_version() & VARIANT_MASK_DBG_CORRELATE) != 0ULL; +} diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 4433d7bd48871fcdcaf984b45767c4168cce3b5b..67b410876ef485f4c75dbff50e843651ed341966 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -38,6 +38,12 @@ typedef struct ValueProfNode { #include "profile/InstrProfData.inc" } ValueProfNode; +typedef void *IntPtrT; +typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT) VTableProfData { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name; +#include "profile/InstrProfData.inc" +} VTableProfData; + /*! * \brief Return 1 if profile counters are continuously synced to the raw * profile via an mmap(). This is in contrast to the default mode, in which @@ -86,10 +92,16 @@ const __llvm_profile_data *__llvm_profile_begin_data(void); const __llvm_profile_data *__llvm_profile_end_data(void); const char *__llvm_profile_begin_names(void); const char *__llvm_profile_end_names(void); +const char *__llvm_profile_begin_vtabnames(void); +const char *__llvm_profile_end_vtabnames(void); char *__llvm_profile_begin_counters(void); char *__llvm_profile_end_counters(void); +char *__llvm_profile_begin_bitmap(void); +char *__llvm_profile_end_bitmap(void); ValueProfNode *__llvm_profile_begin_vnodes(); ValueProfNode *__llvm_profile_end_vnodes(); +const VTableProfData *__llvm_profile_begin_vtables(); +const VTableProfData *__llvm_profile_end_vtables(); uint32_t *__llvm_profile_begin_orderfile(); /*! @@ -101,11 +113,11 @@ void __llvm_profile_reset_counters(void); /*! * \brief Merge profile data from buffer. * - * Read profile data form buffer \p Profile and merge with in-process profile - * counters. The client is expected to have checked or already knows the profile - * data in the buffer matches the in-process counter structure before calling - * it. Returns 0 (success) if the profile data is valid. Upon reading - * invalid/corrupted profile data, returns 1 (failure). + * Read profile data from buffer \p Profile and merge with in-process profile + * counters and bitmaps. The client is expected to have checked or already + * know the profile data in the buffer matches the in-process counter + * structure before calling it. Returns 0 (success) if the profile data is + * valid. Upon reading invalid/corrupted profile data, returns 1 (failure). */ int __llvm_profile_merge_from_buffer(const char *Profile, uint64_t Size); @@ -113,8 +125,8 @@ int __llvm_profile_merge_from_buffer(const char *Profile, uint64_t Size); * * Returns 0 (success) if the profile data in buffer \p Profile with size * \p Size was generated by the same binary and therefore matches - * structurally the in-process counters. If the profile data in buffer is - * not compatible, the interface returns 1 (failure). + * structurally the in-process counters and bitmaps. If the profile data in + * buffer is not compatible, the interface returns 1 (failure). */ int __llvm_profile_check_compatibility(const char *Profile, uint64_t Size); @@ -259,6 +271,9 @@ uint64_t __llvm_profile_get_magic(void); /*! \brief Get the version of the file format. */ uint64_t __llvm_profile_get_version(void); +/*! \brief If the binary is compiled with profile correlation. */ +int hasCorrelation(); + /*! \brief Get the number of entries in the profile data section. */ uint64_t __llvm_profile_get_num_data(const __llvm_profile_data *Begin, const __llvm_profile_data *End); @@ -276,19 +291,38 @@ uint64_t __llvm_profile_get_num_counters(const char *Begin, const char *End); /*! \brief Get the size of the profile counters section in bytes. */ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End); -/* ! \brief Given the sizes of the data and counter information, return the - * number of padding bytes before and after the counters, and after the names, - * in the raw profile. +/*! \brief Get the number of bytes in the profile bitmap section. */ +uint64_t __llvm_profile_get_num_bitmap_bytes(const char *Begin, + const char *End); + +/*! \brief Get the size of the profile name section in bytes. */ +uint64_t __llvm_profile_get_name_size(const char *Begin, const char *End); + +/*! \brief Get the number of virtual table profile data entries */ +uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin, + const VTableProfData *End); + +/*! \brief Get the size of virtual table profile data in bytes. */ +uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin, + const VTableProfData *End); + +/* ! \brief Given the sizes of the data and counter information, computes the + * number of padding bytes before and after the counter section, as well as the + * number of padding bytes after other setions in the raw profile. + * Returns -1 upon errors and 0 upon success. Output parameters should be used + * iff return value is 0. * * Note: When mmap() mode is disabled, no padding bytes before/after counters * are needed. However, in mmap() mode, the counter section in the raw profile * must be page-aligned: this API computes the number of padding bytes * needed to achieve that. */ -void __llvm_profile_get_padding_sizes_for_counters( - uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize, +int __llvm_profile_get_padding_sizes_for_counters( + uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes, + uint64_t NamesSize, uint64_t VTableSize, uint64_t VNameSize, uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters, - uint64_t *PaddingBytesAfterNames); + uint64_t *PaddingBytesAfterBitmap, uint64_t *PaddingBytesAfterNames, + uint64_t *PaddingBytesAfterVTable, uint64_t *PaddingBytesAfterVNames); /*! * \brief Set the flag that profile data has been dumped to the file. diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c index 61ac5d9c02850023d0a569ae574bf6ea6ec441e4..f72ba709fc0e94ebbea7b577a1324ecf54245377 100644 --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -43,17 +43,35 @@ uint64_t __llvm_profile_get_size_for_buffer(void) { const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); + const VTableProfData *VTableBegin = __llvm_profile_begin_vtables(); + const VTableProfData *VTableEnd = __llvm_profile_end_vtables(); + const char *VNamesBegin = __llvm_profile_begin_vtabnames(); + const char *VNamesEnd = __llvm_profile_end_vtabnames(); return __llvm_profile_get_size_for_buffer_internal( - DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd); + DataBegin, DataEnd, CountersBegin, CountersEnd, BitmapBegin, BitmapEnd, + NamesBegin, NamesEnd, VTableBegin, VTableEnd, VNamesBegin, VNamesEnd); } COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_num_data(const __llvm_profile_data *Begin, const __llvm_profile_data *End) { + if (hasCorrelation()) + return 0; intptr_t BeginI = (intptr_t)Begin, EndI = (intptr_t)End; + // `sizeof(__llvm_profile_data) - 1` is required in the numerator when + // [Begin, End] represents an inclusive range. + // For ELF, [Begin, End) represents the address of linker-inserted + // symbols `__start__` and `__stop_`. + // Thereby, `End` is one byte past the inclusive range, and + // `sizeof(__llvm_profile_data) - 1` is not necessary in the numerator to get + // the correct number of profile data. + // FIXME: Consider removing `sizeof(__llvm_profile_data) - 1` if this is true + // across platforms. return ((EndI + sizeof(__llvm_profile_data) - 1) - BeginI) / sizeof(__llvm_profile_data); } @@ -61,9 +79,31 @@ uint64_t __llvm_profile_get_num_data(const __llvm_profile_data *Begin, COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin, const __llvm_profile_data *End) { + if (hasCorrelation()) + return 0; return __llvm_profile_get_num_data(Begin, End) * sizeof(__llvm_profile_data); } +// Counts the number of `VTableProfData` elements within the range of [Begin, +// End). Caller should guarantee that End points to one byte past the inclusive +// range. +// FIXME: Add a compiler-rt test to make sure the number of vtables in the +// raw profile is the same as the number of vtable elements in the instrumented +// binary. +COMPILER_RT_VISIBILITY +uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin, + const VTableProfData *End) { + // Convert pointers to intptr_t to use integer arithmetic. + intptr_t EndI = (intptr_t)End, BeginI = (intptr_t)Begin; + return (EndI - BeginI) / sizeof(VTableProfData); +} + +COMPILER_RT_VISIBILITY +uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin, + const VTableProfData *End) { + return (intptr_t)(End) - (intptr_t)(Begin); +} + COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) { if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) return sizeof(uint8_t); @@ -83,6 +123,19 @@ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End) { __llvm_profile_counter_entry_size(); } +COMPILER_RT_VISIBILITY +uint64_t __llvm_profile_get_num_bitmap_bytes(const char *Begin, + const char *End) { + return (End - Begin); +} + +COMPILER_RT_VISIBILITY +uint64_t __llvm_profile_get_name_size(const char *Begin, const char *End) { + if (hasCorrelation()) + return 0; + return End - Begin; +} + /// Calculate the number of padding bytes needed to add to \p Offset in order /// for (\p Offset + Padding) to be page-aligned. static uint64_t calculateBytesNeededToPageAlign(uint64_t Offset) { @@ -101,48 +154,87 @@ static int needsCounterPadding(void) { } COMPILER_RT_VISIBILITY -void __llvm_profile_get_padding_sizes_for_counters( - uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize, +int __llvm_profile_get_padding_sizes_for_counters( + uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes, + uint64_t NamesSize, uint64_t VTableSize, uint64_t VNameSize, uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters, - uint64_t *PaddingBytesAfterNames) { + uint64_t *PaddingBytesAfterBitmapBytes, uint64_t *PaddingBytesAfterNames, + uint64_t *PaddingBytesAfterVTable, uint64_t *PaddingBytesAfterVName) { + // Counter padding is needed only if continuous mode is enabled. if (!needsCounterPadding()) { *PaddingBytesBeforeCounters = 0; *PaddingBytesAfterCounters = __llvm_profile_get_num_padding_bytes(CountersSize); + *PaddingBytesAfterBitmapBytes = + __llvm_profile_get_num_padding_bytes(NumBitmapBytes); *PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize); - return; + if (PaddingBytesAfterVTable != NULL) + *PaddingBytesAfterVTable = + __llvm_profile_get_num_padding_bytes(VTableSize); + if (PaddingBytesAfterVName != NULL) + *PaddingBytesAfterVName = __llvm_profile_get_num_padding_bytes(VNameSize); + return 0; } + // Value profiling not supported in continuous mode at profile-write time. + // Return -1 to alert the incompatibility. + if (VTableSize != 0 || VNameSize != 0) + return -1; + // In continuous mode, the file offsets for headers and for the start of // counter sections need to be page-aligned. *PaddingBytesBeforeCounters = calculateBytesNeededToPageAlign(sizeof(__llvm_profile_header) + DataSize); *PaddingBytesAfterCounters = calculateBytesNeededToPageAlign(CountersSize); + *PaddingBytesAfterBitmapBytes = + calculateBytesNeededToPageAlign(NumBitmapBytes); *PaddingBytesAfterNames = calculateBytesNeededToPageAlign(NamesSize); + // Set these two variables to zero to avoid uninitialized variables + // even if VTableSize and VNameSize are known to be zero. + if (PaddingBytesAfterVTable != NULL) + *PaddingBytesAfterVTable = 0; + if (PaddingBytesAfterVName != NULL) + *PaddingBytesAfterVName = 0; + return 0; } COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_size_for_buffer_internal( const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, - const char *CountersBegin, const char *CountersEnd, const char *NamesBegin, - const char *NamesEnd) { + const char *CountersBegin, const char *CountersEnd, const char *BitmapBegin, + const char *BitmapEnd, const char *NamesBegin, const char *NamesEnd, + const VTableProfData *VTableBegin, const VTableProfData *VTableEnd, + const char *VNamesBegin, const char *VNamesEnd) { /* Match logic in __llvm_profile_write_buffer(). */ const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); uint64_t CountersSize = __llvm_profile_get_counters_size(CountersBegin, CountersEnd); + const uint64_t NumBitmapBytes = + __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); + const uint64_t VTableSize = + __llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd); + const uint64_t VNameSize = + __llvm_profile_get_name_size(VNamesBegin, VNamesEnd); /* Determine how much padding is needed before/after the counters and after * the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, - PaddingBytesAfterNames; + PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes, + PaddingBytesAfterVTable, PaddingBytesAfterVNames; __llvm_profile_get_padding_sizes_for_counters( - DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters, - &PaddingBytesAfterCounters, &PaddingBytesAfterNames); + DataSize, CountersSize, NumBitmapBytes, NamesSize, 0 /* VTableSize */, + 0 /* VNameSize */, &PaddingBytesBeforeCounters, + &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes, + &PaddingBytesAfterNames, &PaddingBytesAfterVTable, + &PaddingBytesAfterVNames); return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) + DataSize + PaddingBytesBeforeCounters + CountersSize + - PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames; + PaddingBytesAfterCounters + NumBitmapBytes + + PaddingBytesAfterBitmapBytes + NamesSize + PaddingBytesAfterNames + + VTableSize + PaddingBytesAfterVTable + VNameSize + + PaddingBytesAfterVNames; } COMPILER_RT_VISIBILITY @@ -160,9 +252,14 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer(char *Buffer) { COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal( char *Buffer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) { + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + const char *NamesBegin, const char *NamesEnd) { ProfDataWriter BufferWriter; initBufferWriter(&BufferWriter, Buffer); - return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin, - CountersEnd, 0, NamesBegin, NamesEnd, 0); + // Set virtual table arguments to NULL since they are not supported yet. + return lprofWriteDataImpl( + &BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, + BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd, + /*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL, + /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0); } diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 2bd6a49ce065441715b00a7de30f95e6aa5568c0..6efee9fe2c1b68f3ed25be2cd056d8db9197ed39 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -108,14 +108,18 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); uint64_t CountersSize = __llvm_profile_get_counters_size(CountersBegin, CountersEnd); + uint64_t NumBitmapBytes = + __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); - /* Check that the counter and data sections in this image are + /* Check that the counter, bitmap, and data sections in this image are * page-aligned. */ unsigned PageSize = getpagesize(); if ((intptr_t)CountersBegin % PageSize != 0) { @@ -123,19 +127,28 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { CountersBegin, PageSize); return 1; } + if ((intptr_t)BitmapBegin % PageSize != 0) { + PROF_ERR("Bitmap section not page-aligned (start = %p, pagesz = %u).\n", + BitmapBegin, PageSize); + return 1; + } if ((intptr_t)DataBegin % PageSize != 0) { PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n", DataBegin, PageSize); return 1; } + int Fileno = fileno(File); /* Determine how much padding is needed before/after the counters and * after the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, - PaddingBytesAfterNames; + PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes, + PaddingBytesAfterVTable, PaddingBytesAfterVNames; __llvm_profile_get_padding_sizes_for_counters( - DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters, - &PaddingBytesAfterCounters, &PaddingBytesAfterNames); + DataSize, CountersSize, NumBitmapBytes, NamesSize, /*VTableSize=*/0, + /*VNameSize=*/0, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, + &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames, + &PaddingBytesAfterVTable, &PaddingBytesAfterVNames); uint64_t PageAlignedCountersLength = CountersSize + PaddingBytesAfterCounters; uint64_t FileOffsetToCounters = CurrentFileOffset + @@ -155,6 +168,31 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { FileOffsetToCounters); return 1; } + + /* Also mmap MCDC bitmap bytes. If there aren't any bitmap bytes, mmap() + * will fail with EINVAL. */ + if (NumBitmapBytes == 0) + return 0; + + uint64_t PageAlignedBitmapLength = + NumBitmapBytes + PaddingBytesAfterBitmapBytes; + uint64_t FileOffsetToBitmap = + CurrentFileOffset + sizeof(__llvm_profile_header) + DataSize + + PaddingBytesBeforeCounters + CountersSize + PaddingBytesAfterCounters; + void *BitmapMmap = + mmap((void *)BitmapBegin, PageAlignedBitmapLength, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToBitmap); + if (BitmapMmap != BitmapBegin) { + PROF_ERR( + "Continuous counter sync mode is enabled, but mmap() failed (%s).\n" + " - BitmapBegin: %p\n" + " - PageAlignedBitmapLength: %" PRIu64 "\n" + " - Fileno: %d\n" + " - FileOffsetToBitmap: %" PRIu64 "\n", + strerror(errno), BitmapBegin, PageAlignedBitmapLength, Fileno, + FileOffsetToBitmap); + return 1; + } return 0; } #elif defined(__ELF__) || defined(_WIN32) @@ -197,6 +235,8 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); /* Get the file size. */ uint64_t FileSize = 0; @@ -218,6 +258,11 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { /* Return the memory allocated for counters to OS. */ lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd); + + /* BIAS MODE not supported yet for Bitmap (MCDC). */ + + /* Return the memory allocated for counters to OS. */ + lprofReleaseMemoryPagesToOS((uintptr_t)BitmapBegin, (uintptr_t)BitmapEnd); return 0; } #else diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h index 360165e32ab3fe2c6da0d3df053e587382ce55c1..d5bd0e41fb129174dda61859ef3b3b168dbf0783 100644 --- a/compiler-rt/lib/profile/InstrProfilingInternal.h +++ b/compiler-rt/lib/profile/InstrProfilingInternal.h @@ -21,8 +21,10 @@ */ uint64_t __llvm_profile_get_size_for_buffer_internal( const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, - const char *CountersBegin, const char *CountersEnd, const char *NamesBegin, - const char *NamesEnd); + const char *CountersBegin, const char *CountersEnd, const char *BitmapBegin, + const char *BitmapEnd, const char *NamesBegin, const char *NamesEnd, + const VTableProfData *VTableBegin, const VTableProfData *VTableEnd, + const char *VNamesBegin, const char *VNamesEnd); /*! * \brief Write instrumentation data to the given buffer, given explicit @@ -36,7 +38,8 @@ uint64_t __llvm_profile_get_size_for_buffer_internal( int __llvm_profile_write_buffer_internal( char *Buffer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd); + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + const char *NamesBegin, const char *NamesEnd); /*! * The data structure describing the data to be written by the @@ -153,8 +156,11 @@ int lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, const char *CountersEnd, + const char *BitmapBegin, const char *BitmapEnd, VPDataReaderType *VPDataReader, const char *NamesBegin, - const char *NamesEnd, int SkipNameDataWrite); + const char *NamesEnd, const VTableProfData *VTableBegin, + const VTableProfData *VTableEnd, const char *VNamesBegin, + const char *VNamesEnd, int SkipNameDataWrite); /* Merge value profile data pointed to by SrcValueProfData into * in-memory profile counters pointed by to DstData. */ diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c index 432e824955f8a7e007bc25dcfedabe3b6a383312..2dcf828fcf64e4418ed2d7a3ed57631dd5836c4a 100644 --- a/compiler-rt/lib/profile/InstrProfilingMerge.c +++ b/compiler-rt/lib/profile/InstrProfilingMerge.c @@ -53,7 +53,7 @@ int __llvm_profile_check_compatibility(const char *ProfileData, SrcDataStart = (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + Header->BinaryIdsSize); - SrcDataEnd = SrcDataStart + Header->DataSize; + SrcDataEnd = SrcDataStart + Header->NumData; if (ProfileSize < sizeof(__llvm_profile_header)) return 1; @@ -61,21 +61,26 @@ int __llvm_profile_check_compatibility(const char *ProfileData, /* Check the header first. */ if (Header->Magic != __llvm_profile_get_magic() || Header->Version != __llvm_profile_get_version() || - Header->DataSize != + Header->NumData != __llvm_profile_get_num_data(__llvm_profile_begin_data(), __llvm_profile_end_data()) || - Header->CountersSize != + Header->NumCounters != __llvm_profile_get_num_counters(__llvm_profile_begin_counters(), __llvm_profile_end_counters()) || - Header->NamesSize != (uint64_t)(__llvm_profile_end_names() - - __llvm_profile_begin_names()) || + Header->NumBitmapBytes != + __llvm_profile_get_num_bitmap_bytes(__llvm_profile_begin_bitmap(), + __llvm_profile_end_bitmap()) || + Header->NamesSize != + __llvm_profile_get_name_size(__llvm_profile_begin_names(), + __llvm_profile_end_names()) || Header->ValueKindLast != IPVK_Last) return 1; if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize + - Header->DataSize * sizeof(__llvm_profile_data) + Header->NamesSize + - Header->CountersSize * __llvm_profile_counter_entry_size()) + Header->NumData * sizeof(__llvm_profile_data) + Header->NamesSize + + Header->NumCounters * __llvm_profile_counter_entry_size() + + Header->NumBitmapBytes) return 1; for (SrcData = SrcDataStart, @@ -83,7 +88,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData, SrcData < SrcDataEnd; ++SrcData, ++DstData) { if (SrcData->NameRef != DstData->NameRef || SrcData->FuncHash != DstData->FuncHash || - SrcData->NumCounters != DstData->NumCounters) + SrcData->NumCounters != DstData->NumCounters || + SrcData->NumBitmapBytes != DstData->NumBitmapBytes) return 1; } @@ -99,6 +105,26 @@ static uintptr_t signextIfWin64(void *V) { #endif } +// Skip names section, vtable profile data section and vtable names section +// for runtime profile merge. To merge runtime addresses from multiple +// profiles collected from the same instrumented binary, the binary should be +// loaded at fixed base address (e.g., build with -no-pie, or run with ASLR +// disabled). In this set-up these three sections remain unchanged. +static uint64_t +getDistanceFromCounterToValueProf(const __llvm_profile_header *const Header) { + const uint64_t VTableSectionSize = + Header->NumVTables * sizeof(VTableProfData); + const uint64_t PaddingBytesAfterVTableSection = + __llvm_profile_get_num_padding_bytes(VTableSectionSize); + const uint64_t VNamesSize = Header->VNamesSize; + const uint64_t PaddingBytesAfterVNamesSize = + __llvm_profile_get_num_padding_bytes(VNamesSize); + return Header->NamesSize + + __llvm_profile_get_num_padding_bytes(Header->NamesSize) + + VTableSectionSize + PaddingBytesAfterVTableSection + VNamesSize + + PaddingBytesAfterVNamesSize; +} + COMPILER_RT_VISIBILITY int __llvm_profile_merge_from_buffer(const char *ProfileData, uint64_t ProfileSize) { @@ -112,24 +138,46 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, __llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData; __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData; - char *SrcCountersStart; + + char *SrcCountersStart, *DstCounter; + const char *SrcCountersEnd, *SrcCounter; + const char *SrcBitmapStart; const char *SrcNameStart; const char *SrcValueProfDataStart, *SrcValueProfData; uintptr_t CountersDelta = Header->CountersDelta; + uintptr_t BitmapDelta = Header->BitmapDelta; SrcDataStart = (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + Header->BinaryIdsSize); - SrcDataEnd = SrcDataStart + Header->DataSize; + SrcDataEnd = SrcDataStart + Header->NumData; SrcCountersStart = (char *)SrcDataEnd; - SrcNameStart = SrcCountersStart + - Header->CountersSize * __llvm_profile_counter_entry_size(); + SrcCountersEnd = SrcCountersStart + + Header->NumCounters * __llvm_profile_counter_entry_size(); + SrcBitmapStart = SrcCountersEnd; + SrcNameStart = SrcBitmapStart + Header->NumBitmapBytes; SrcValueProfDataStart = - SrcNameStart + Header->NamesSize + - __llvm_profile_get_num_padding_bytes(Header->NamesSize); - if (SrcNameStart < SrcCountersStart) + SrcNameStart + getDistanceFromCounterToValueProf(Header); + if (SrcNameStart < SrcCountersStart || SrcNameStart < SrcBitmapStart) return 1; + // Merge counters by iterating the entire counter section when correlation is + // enabled. + if (hasCorrelation()) { + for (SrcCounter = SrcCountersStart, + DstCounter = __llvm_profile_begin_counters(); + SrcCounter < SrcCountersEnd;) { + if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) { + *DstCounter &= *SrcCounter; + } else { + *(uint64_t *)DstCounter += *(uint64_t *)SrcCounter; + } + SrcCounter += __llvm_profile_counter_entry_size(); + DstCounter += __llvm_profile_counter_entry_size(); + } + return 0; + } + for (SrcData = SrcDataStart, DstData = (__llvm_profile_data *)__llvm_profile_begin_data(), SrcValueProfData = SrcValueProfDataStart; @@ -140,6 +188,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, // extend CounterPtr to get the original value. char *DstCounters = (char *)((uintptr_t)DstData + signextIfWin64(DstData->CounterPtr)); + char *DstBitmap = + (char *)((uintptr_t)DstData + signextIfWin64(DstData->BitmapPtr)); unsigned NVK = 0; // SrcData is a serialized representation of the memory image. We need to @@ -169,6 +219,21 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, } } + const char *SrcBitmap = + SrcBitmapStart + ((uintptr_t)SrcData->BitmapPtr - BitmapDelta); + // BitmapDelta also needs to be decreased as we advance to the next data + // record. + BitmapDelta -= sizeof(*SrcData); + unsigned NB = SrcData->NumBitmapBytes; + // NumBitmapBytes may legitimately be 0. Just keep going. + if (NB != 0) { + if (SrcBitmap < SrcBitmapStart || (SrcBitmap + NB) > SrcNameStart) + return 1; + // Merge Src and Dst Bitmap bytes by simply ORing them together. + for (unsigned I = 0; I < NB; I++) + DstBitmap[I] |= SrcBitmap[I]; + } + /* Now merge value profile data. */ if (!VPMergeHook) continue; diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c index 63219da18ae3a940690588353b073e1588e0fff3..b9d51b698b414fc820ba512300ca52417dc74554 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c @@ -175,7 +175,8 @@ void __llvm_profile_register_names_function(void *NamesStart, uint64_t NamesSize) {} // The __start_SECNAME and __stop_SECNAME symbols (for SECNAME \in -// {"__llvm_prf_cnts", "__llvm_prf_data", "__llvm_prf_name", "__llvm_prf_vnds"}) +// {"__llvm_prf_cnts", "__llvm_prf_data", "__llvm_prf_name", "__llvm_prf_vnds", +// "__llvm_prf_vns", "__llvm_prf_vtab"}) // are always live when linking on AIX, regardless if the .o's being linked // reference symbols from the profile library (for example when no files were // compiled with -fprofile-generate). That's because these symbols are kept @@ -187,12 +188,20 @@ void __llvm_profile_register_names_function(void *NamesStart, // define these zero length variables in each of the above 4 sections. static int dummy_cnts[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_CNTS_SECT_NAME); +static int dummy_bits[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_BITS_SECT_NAME); static int dummy_data[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_DATA_SECT_NAME); static const int dummy_name[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_NAME_SECT_NAME); static int dummy_vnds[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); +static int dummy_orderfile[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_ORDERFILE_SECT_NAME); +static int dummy_vname[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_VNAME_SECT_NAME); +static int dummy_vtab[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_VTAB_SECT_NAME); // To avoid GC'ing of the dummy variables by the linker, reference them in an // array and reference the array in the runtime registration code @@ -202,8 +211,10 @@ static int dummy_vnds[0] COMPILER_RT_SECTION( #pragma GCC diagnostic ignored "-Wcast-qual" #endif COMPILER_RT_VISIBILITY -void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_data, - (void *)&dummy_name, (void *)&dummy_vnds}; +void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_bits, + (void *)&dummy_data, (void *)&dummy_name, + (void *)&dummy_vnds, (void *)&dummy_orderfile, + (void *)&dummy_vname, (void *)&dummy_vtab}; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c b/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c index d9f2a113f5b020c2e91fb65cdaf64a5bc3a361a6..6adc7f328cbf7bd8bc64054a9ce36480f1e95aa7 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c @@ -31,6 +31,22 @@ extern char COMPILER_RT_VISIBILITY extern char CountersEnd __asm("section$end$__DATA$" INSTR_PROF_CNTS_SECT_NAME); COMPILER_RT_VISIBILITY +extern char + BitmapStart __asm("section$start$__DATA$" INSTR_PROF_BITS_SECT_NAME); +COMPILER_RT_VISIBILITY +extern char BitmapEnd __asm("section$end$__DATA$" INSTR_PROF_BITS_SECT_NAME); +COMPILER_RT_VISIBILITY +extern VTableProfData + VTableProfStart __asm("section$start$__DATA$" INSTR_PROF_VTAB_SECT_NAME); +COMPILER_RT_VISIBILITY +extern VTableProfData + VTableProfEnd __asm("section$end$__DATA$" INSTR_PROF_VTAB_SECT_NAME); +COMPILER_RT_VISIBILITY +extern char + VNameStart __asm("section$start$__DATA$" INSTR_PROF_VNAME_SECT_NAME); +COMPILER_RT_VISIBILITY +extern char VNameEnd __asm("section$end$__DATA$" INSTR_PROF_VNAME_SECT_NAME); +COMPILER_RT_VISIBILITY extern uint32_t OrderFileStart __asm("section$start$__DATA$" INSTR_PROF_ORDERFILE_SECT_NAME); @@ -56,6 +72,22 @@ char *__llvm_profile_begin_counters(void) { return &CountersStart; } COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) { return &CountersEnd; } COMPILER_RT_VISIBILITY +char *__llvm_profile_begin_bitmap(void) { return &BitmapStart; } +COMPILER_RT_VISIBILITY +char *__llvm_profile_end_bitmap(void) { return &BitmapEnd; } +COMPILER_RT_VISIBILITY +const VTableProfData *__llvm_profile_begin_vtables(void) { + return &VTableProfStart; +} +COMPILER_RT_VISIBILITY +const VTableProfData *__llvm_profile_end_vtables(void) { + return &VTableProfEnd; +} +COMPILER_RT_VISIBILITY +const char *__llvm_profile_begin_vtabnames(void) { return &VNameStart; } +COMPILER_RT_VISIBILITY +const char *__llvm_profile_end_vtabnames(void) { return &VNameEnd; } +COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) { return &OrderFileStart; } COMPILER_RT_VISIBILITY diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index 2cce0a4b2c48d35ce51e665c26498ba2d0638ec9..761f21d6dbb8b05340696cd699d72ae70a927503 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -33,8 +33,14 @@ #define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON) #define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON) #define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON) +#define PROF_VNAME_START INSTR_PROF_SECT_START(INSTR_PROF_VNAME_COMMON) +#define PROF_VNAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNAME_COMMON) #define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON) #define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON) +#define PROF_VTABLE_START INSTR_PROF_SECT_START(INSTR_PROF_VTAB_COMMON) +#define PROF_VTABLE_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VTAB_COMMON) +#define PROF_BITS_START INSTR_PROF_SECT_START(INSTR_PROF_BITS_COMMON) +#define PROF_BITS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_BITS_COMMON) #define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON) #define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON) #define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON) @@ -48,6 +54,12 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_VNAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_BITS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_BITS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_NAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; @@ -68,12 +80,31 @@ COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) { COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) { return &PROF_NAME_STOP; } +COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vtabnames(void) { + return &PROF_VNAME_START; +} +COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vtabnames(void) { + return &PROF_VNAME_STOP; +} +COMPILER_RT_VISIBILITY const VTableProfData * +__llvm_profile_begin_vtables(void) { + return &PROF_VTABLE_START; +} +COMPILER_RT_VISIBILITY const VTableProfData *__llvm_profile_end_vtables(void) { + return &PROF_VTABLE_STOP; +} COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) { return &PROF_CNTS_START; } COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) { return &PROF_CNTS_STOP; } +COMPILER_RT_VISIBILITY char *__llvm_profile_begin_bitmap(void) { + return &PROF_BITS_START; +} +COMPILER_RT_VISIBILITY char *__llvm_profile_end_bitmap(void) { + return &PROF_BITS_STOP; +} COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) { return &PROF_ORDERFILE_START; } diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c index c7b6e842c9fac27b2885242d6c2cd48797bfe799..aa79a5641ceca6192946935af5d82f2156b54aa1 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c @@ -18,8 +18,12 @@ static const __llvm_profile_data *DataFirst = NULL; static const __llvm_profile_data *DataLast = NULL; +static const VTableProfData *VTableProfDataFirst = NULL; +static const VTableProfData *VTableProfDataLast = NULL; static const char *NamesFirst = NULL; static const char *NamesLast = NULL; +static const char *VNamesFirst = NULL; +static const char *VNamesLast = NULL; static char *CountersFirst = NULL; static char *CountersLast = NULL; static uint32_t *OrderFileFirst = NULL; @@ -80,14 +84,29 @@ COMPILER_RT_VISIBILITY const __llvm_profile_data *__llvm_profile_begin_data(void) { return DataFirst; } COMPILER_RT_VISIBILITY const __llvm_profile_data *__llvm_profile_end_data(void) { return DataLast; } +COMPILER_RT_VISIBILITY const VTableProfData * +__llvm_profile_begin_vtables(void) { + return VTableProfDataFirst; +} +COMPILER_RT_VISIBILITY const VTableProfData *__llvm_profile_end_vtables(void) { + return VTableProfDataLast; +} COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) { return NamesFirst; } COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) { return NamesLast; } COMPILER_RT_VISIBILITY +const char *__llvm_profile_begin_vtabnames(void) { return VNamesFirst; } +COMPILER_RT_VISIBILITY +const char *__llvm_profile_end_vtabnames(void) { return VNamesLast; } +COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) { return CountersFirst; } COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) { return CountersLast; } +COMPILER_RT_VISIBILITY +char *__llvm_profile_begin_bitmap(void) { return BitmapFirst; } +COMPILER_RT_VISIBILITY +char *__llvm_profile_end_bitmap(void) { return BitmapLast; } /* TODO: correctly set up OrderFileFirst. */ COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) { return OrderFileFirst; } diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c b/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c index dd576b2f8357dbb18efc7b22df01fa2cb3d6603b..3b9b44eb6a9846de5c92770a2ec7a27c0da32c2c 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c @@ -6,6 +6,8 @@ |* \*===----------------------------------------------------------------------===*/ +#include + #include "InstrProfiling.h" #include "InstrProfilingInternal.h" @@ -13,12 +15,14 @@ #if defined(_MSC_VER) /* Merge read-write sections into .data. */ -#pragma comment(linker, "/MERGE:.lprfc=.data") +#pragma comment(linker, "/MERGE:.lprfb=.data") #pragma comment(linker, "/MERGE:.lprfd=.data") #pragma comment(linker, "/MERGE:.lprfv=.data") #pragma comment(linker, "/MERGE:.lprfnd=.data") /* Do *NOT* merge .lprfn and .lcovmap into .rdata. llvm-cov must be able to find * after the fact. + * Do *NOT* merge .lprfc .rdata. When binary profile correlation is enabled, + * llvm-cov must be able to find after the fact. */ /* Allocate read-only section bounds. */ @@ -30,6 +34,8 @@ #pragma section(".lprfd$Z", read, write) #pragma section(".lprfc$A", read, write) #pragma section(".lprfc$Z", read, write) +#pragma section(".lprfb$A", read, write) +#pragma section(".lprfb$Z", read, write) #pragma section(".lorderfile$A", read, write) #pragma section(".lprfnd$A", read, write) #pragma section(".lprfnd$Z", read, write) @@ -43,6 +49,8 @@ const char COMPILER_RT_SECTION(".lprfn$Z") NamesEnd = '\0'; char COMPILER_RT_SECTION(".lprfc$A") CountersStart; char COMPILER_RT_SECTION(".lprfc$Z") CountersEnd; +char COMPILER_RT_SECTION(".lprfb$A") BitmapStart; +char COMPILER_RT_SECTION(".lprfb$Z") BitmapEnd; uint32_t COMPILER_RT_SECTION(".lorderfile$A") OrderFileStart; ValueProfNode COMPILER_RT_SECTION(".lprfnd$A") VNodesStart; @@ -53,11 +61,30 @@ const __llvm_profile_data *__llvm_profile_begin_data(void) { } const __llvm_profile_data *__llvm_profile_end_data(void) { return &DataEnd; } +// Type profiling isn't implemented under MSVC ABI, so return NULL (rather than +// implementing linker magic on Windows) to make it more explicit. To elaborate, +// the current type profiling implementation maps a profiled vtable address to a +// vtable variable through vtables mangled name. Under MSVC ABI, the variable +// name for vtables might not be the mangled name (see +// MicrosoftCXXABI::getAddrOfVTable in MicrosoftCXXABI.cpp for more details on +// how a vtable name is computed). Note the mangled name is still in the vtable +// IR (just not variable name) for mapping purpose, but more implementation work +// is required. +const VTableProfData *__llvm_profile_begin_vtables(void) { return NULL; } +const VTableProfData *__llvm_profile_end_vtables(void) { return NULL; } + const char *__llvm_profile_begin_names(void) { return &NamesStart + 1; } const char *__llvm_profile_end_names(void) { return &NamesEnd; } +// Type profiling isn't supported on Windows, so return NULl to make it more +// explicit. +const char *__llvm_profile_begin_vtabnames(void) { return NULL; } +const char *__llvm_profile_end_vtabnames(void) { return NULL; } + char *__llvm_profile_begin_counters(void) { return &CountersStart + 1; } char *__llvm_profile_end_counters(void) { return &CountersEnd; } +char *__llvm_profile_begin_bitmap(void) { return &BitmapStart + 1; } +char *__llvm_profile_end_bitmap(void) { return &BitmapEnd; } uint32_t *__llvm_profile_begin_orderfile(void) { return &OrderFileStart; } ValueProfNode *__llvm_profile_begin_vnodes(void) { return &VNodesStart + 1; } diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c index 4a392984fe6bac2caa880e18c96ec7c76264b331..8816a71155511b4d9d45a602eb5b2d4d530a74be 100644 --- a/compiler-rt/lib/profile/InstrProfilingWriter.c +++ b/compiler-rt/lib/profile/InstrProfilingWriter.c @@ -246,32 +246,46 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer, const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); + const VTableProfData *VTableBegin = __llvm_profile_begin_vtables(); + const VTableProfData *VTableEnd = __llvm_profile_end_vtables(); + const char *VNamesBegin = __llvm_profile_begin_vtabnames(); + const char *VNamesEnd = __llvm_profile_end_vtabnames(); return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin, - CountersEnd, VPDataReader, NamesBegin, NamesEnd, - SkipNameDataWrite); + CountersEnd, BitmapBegin, BitmapEnd, VPDataReader, + NamesBegin, NamesEnd, VTableBegin, VTableEnd, + VNamesBegin, VNamesEnd, SkipNameDataWrite); } COMPILER_RT_VISIBILITY int lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, const char *CountersEnd, + const char *BitmapBegin, const char *BitmapEnd, VPDataReaderType *VPDataReader, const char *NamesBegin, - const char *NamesEnd, int SkipNameDataWrite) { - int DebugInfoCorrelate = - (__llvm_profile_get_version() & VARIANT_MASK_DBG_CORRELATE) != 0ULL; - + const char *NamesEnd, const VTableProfData *VTableBegin, + const VTableProfData *VTableEnd, const char *VNamesBegin, + const char *VNamesEnd, int SkipNameDataWrite) { /* Calculate size of sections. */ const uint64_t DataSectionSize = - DebugInfoCorrelate ? 0 : __llvm_profile_get_data_size(DataBegin, DataEnd); - const uint64_t NumData = - DebugInfoCorrelate ? 0 : __llvm_profile_get_num_data(DataBegin, DataEnd); + __llvm_profile_get_data_size(DataBegin, DataEnd); + const uint64_t NumData = __llvm_profile_get_num_data(DataBegin, DataEnd); const uint64_t CountersSectionSize = __llvm_profile_get_counters_size(CountersBegin, CountersEnd); const uint64_t NumCounters = __llvm_profile_get_num_counters(CountersBegin, CountersEnd); - const uint64_t NamesSize = DebugInfoCorrelate ? 0 : NamesEnd - NamesBegin; + const uint64_t NumBitmapBytes = + __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); + const uint64_t NamesSize = __llvm_profile_get_name_size(NamesBegin, NamesEnd); + const uint64_t NumVTables = + __llvm_profile_get_num_vtable(VTableBegin, VTableEnd); + const uint64_t VTableSectionSize = + __llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd); + const uint64_t VNamesSize = + __llvm_profile_get_name_size(VNamesBegin, VNamesEnd); /* Create the header. */ __llvm_profile_header Header; @@ -279,18 +293,17 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, /* Determine how much padding is needed before/after the counters and after * the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, - PaddingBytesAfterNames; - __llvm_profile_get_padding_sizes_for_counters( - DataSectionSize, CountersSectionSize, NamesSize, - &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, - &PaddingBytesAfterNames); + PaddingBytesAfterBitmapBytes, PaddingBytesAfterNames, + PaddingBytesAfterVTable, PaddingBytesAfterVNames; + if (__llvm_profile_get_padding_sizes_for_counters( + DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize, + VTableSectionSize, VNamesSize, &PaddingBytesBeforeCounters, + &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes, + &PaddingBytesAfterNames, &PaddingBytesAfterVTable, + &PaddingBytesAfterVNames) == -1) + return -1; { - // TODO: Unfortunately the header's fields are named DataSize and - // CountersSize when they should be named NumData and NumCounters, - // respectively. - const uint64_t CountersSize = NumCounters; - const uint64_t DataSize = NumData; /* Initialize header structure. */ #define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init; #include "profile/InstrProfData.inc" @@ -300,10 +313,11 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, * CountersDelta to match. */ #ifdef _WIN64 Header.CountersDelta = (uint32_t)Header.CountersDelta; + Header.BitmapDelta = (uint32_t)Header.BitmapDelta; #endif /* The data and names sections are omitted in lightweight mode. */ - if (DebugInfoCorrelate) { + if (NumData == 0 && NamesSize == 0) { Header.CountersDelta = 0; Header.NamesDelta = 0; } @@ -319,19 +333,25 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, /* Write the profile data. */ ProfDataIOVec IOVecData[] = { - {DebugInfoCorrelate ? NULL : DataBegin, sizeof(uint8_t), DataSectionSize, - 0}, + {DataBegin, sizeof(uint8_t), DataSectionSize, 0}, {NULL, sizeof(uint8_t), PaddingBytesBeforeCounters, 1}, {CountersBegin, sizeof(uint8_t), CountersSectionSize, 0}, {NULL, sizeof(uint8_t), PaddingBytesAfterCounters, 1}, - {(SkipNameDataWrite || DebugInfoCorrelate) ? NULL : NamesBegin, - sizeof(uint8_t), NamesSize, 0}, - {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1}}; + {BitmapBegin, sizeof(uint8_t), NumBitmapBytes, 0}, + {NULL, sizeof(uint8_t), PaddingBytesAfterBitmapBytes, 1}, + {SkipNameDataWrite ? NULL : NamesBegin, sizeof(uint8_t), NamesSize, 0}, + {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1}, + {VTableBegin, sizeof(uint8_t), VTableSectionSize, 0}, + {NULL, sizeof(uint8_t), PaddingBytesAfterVTable, 1}, + {SkipNameDataWrite ? NULL : VNamesBegin, sizeof(uint8_t), VNamesSize, 0}, + {NULL, sizeof(uint8_t), PaddingBytesAfterVNames, 1}}; if (Writer->Write(Writer, IOVecData, sizeof(IOVecData) / sizeof(*IOVecData))) return -1; - /* Value profiling is not yet supported in continuous mode. */ - if (__llvm_profile_is_continuous_mode_enabled()) + /* Value profiling is not yet supported in continuous mode and profile + * correlation mode. */ + if (__llvm_profile_is_continuous_mode_enabled() || + (NumData == 0 && NamesSize == 0)) return 0; return writeValueProfData(Writer, VPDataReader, DataBegin, DataEnd); diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index bc37c85a140adcd111625f89f39948d0026e5a54..c6a4c61ff48765ad4893f3b996e6d92e8c46a07f 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -34,8 +34,9 @@ if(NOT ANDROID) if(NOT COMPILER_RT_STANDALONE_BUILD AND NOT LLVM_RUNTIMES_BUILD) # Use LLVM utils and Clang from the same build tree. list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS - clang clang-resource-headers FileCheck count not llvm-config llvm-nm llvm-objdump - llvm-readelf llvm-readobj llvm-size llvm-symbolizer compiler-rt-headers sancov split-file) + clang clang-resource-headers FileCheck count not llvm-config llvm-nm + llvm-objdump llvm-readelf llvm-readobj llvm-size llvm-symbolizer + compiler-rt-headers sancov split-file llvm-strip) if (WIN32) list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS KillTheDoctor) endif() diff --git a/compiler-rt/test/profile/Linux/binary-id.c b/compiler-rt/test/profile/Linux/binary-id.c index 61b8ed9268496a354930c3c77e45e75c33e753f5..9bff9004e97a7eaf35f4571d90c91f38695ba7cb 100644 --- a/compiler-rt/test/profile/Linux/binary-id.c +++ b/compiler-rt/test/profile/Linux/binary-id.c @@ -17,16 +17,35 @@ // RUN: llvm-profdata show --binary-ids %t.profdir/default_*.profraw > %t.profraw.out // RUN: FileCheck %s --check-prefix=BINARY-ID-MERGE-PROF < %t.profraw.out -// RUN: llvm-profdata merge -o %t.profdata %t.profraw %t.profraw +// RUN: llvm-profdata merge -o %t.profdata %t.profdir/default_*.profraw // RUN: llvm-profdata show --binary-ids %t.profdata > %t.profdata.out -// RUN: FileCheck %s --check-prefix=BINARY-ID-INDEXED-PROF < %t.profraw.out +// RUN: FileCheck %s --check-prefix=BINARY-ID-INDEXED-PROF < %t.profdata.out +// Test raw profiles with shared libraries. +// RUN: split-file %s %t.dir +// RUN: %clang_profgen -Wl,--build-id -fpic -shared -O2 %t.dir/foo.c -o %t.dir/libfoo.so +// RUN: %clang_profgen -Wl,--build-id -fpic -shared -O2 %t.dir/bar.c -o %t.dir/libbar.so +// RUN: %clang_profgen -Wl,--build-id -O2 %t.dir/main.c %t.dir/libfoo.so %t.dir/libbar.so -o %t +// RUN: env LLVM_PROFILE_FILE=%t.profraw LD_LIBRARY_PATH=%t.dir %run %t +// RUN: llvm-profdata show --binary-ids %t.profraw > %t.profraw.out +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: FileCheck %s --check-prefix=BINARY-ID-SHARE-RAW-PROF < %t.profraw.out + +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: llvm-profdata show --binary-ids %t.profdata > %t.profdata.out +// RUN: FileCheck %s --check-prefix=BINARY-ID-SHARE-INDEXED-PROF < %t.profraw.out + +//--- foo.c void foo() { } +//--- bar.c void bar() { } +//--- main.c +void foo(); +void bar(); int main() { foo(); bar(); @@ -59,3 +78,21 @@ int main() { // BINARY-ID-INDEXED-PROF-NEXT: Maximum internal block count: 0 // BINARY-ID-INDEXED-PROF-NEXT: Binary IDs: // BINARY-ID-INDEXED-PROF-NEXT: {{[0-9a-f]+}} + +// BINARY-ID-SHARE-RAW-PROF: Instrumentation level: Front-end +// BINARY-ID-SHARE-RAW-PROF-NEXT: Total functions: 3 +// BINARY-ID-SHARE-RAW-PROF-NEXT: Maximum function count: 1 +// BINARY-ID-SHARE-RAW-PROF-NEXT: Maximum internal block count: 0 +// BINARY-ID-SHARE-RAW-PROF-NEXT: Binary IDs: +// BINARY-ID-SHARE-RAW-PROF-NEXT: {{[0-9a-f]+}} +// BINARY-ID-SHARE-RAW-PROF-NEXT: {{[0-9a-f]+}} +// BINARY-ID-SHARE-RAW-PROF-NEXT: {{[0-9a-f]+}} + +// BINARY-ID-SHARE-INDEXED-PROF: Instrumentation level: Front-end +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: Total functions: 3 +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: Maximum function count: 1 +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: Maximum internal block count: 0 +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: Binary IDs: +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: {{[0-9a-f]+}} +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: {{[0-9a-f]+}} +// BINARY-ID-SHARE-INDEXED-PROF-NEXT: {{[0-9a-f]+}} diff --git a/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c b/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c index 226d678aca347a430cec1009d34a96ab42a96b99..245dc798910425bbf8e323d1d63d601ed8ef0526 100644 --- a/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c +++ b/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c @@ -4,7 +4,7 @@ // RUN: %clang_pgogen -o %t.no.dbg -mllvm --debug-info-correlate -mllvm --disable-vp=true %s // RUN: not llvm-profdata show --debug-info=%t.no.dbg 2>&1 | FileCheck %s --check-prefix NO-DBG -// NO-DBG: unable to correlate profile: could not find any profile metadata in debug info +// NO-DBG: unable to correlate profile: could not find any profile data metadata in correlated file // YAML: Probes: // YAML: - Function Name: a diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46a925ce0f9fe34f3d5b749263da46747b35c028 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp @@ -0,0 +1,230 @@ +// REQUIRES: lld-available + +// Building the instrumented binary will fail because lld doesn't support +// big-endian ELF for PPC (aka ABI 1). +// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported +// UNSUPPORTED: ppc && host-byteorder-big-endian + +// RUN: rm -rf %t && mkdir %t && cd %t + +// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test +// RUN: env LLVM_PROFILE_FILE=test.profraw ./test + +// Show vtable profiles from raw profile. +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW + +// Generate indexed profile from raw profile and show the data. +// RUN: llvm-profdata merge --keep-vtable-symbols test.profraw -o test.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED + +// Generate text profile from raw and indexed profiles respectively and show the data. +// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profraw -o raw.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT +// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profdata -o indexed.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT + +// Generate indexed profile from text profiles and show the data +// RUN: llvm-profdata merge --keep-vtable-symbols --binary raw.proftext -o text.profraw +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED +// RUN: llvm-profdata merge --keep-vtable-symbols --binary indexed.proftext -o text.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED + +// COMMON: Counters: +// COMMON-NEXT: main: +// COMMON-NEXT: Hash: 0x068617320ec408a0 +// COMMON-NEXT: Counters: 4 +// COMMON-NEXT: Indirect Call Site Count: 2 +// COMMON-NEXT: Number of instrumented vtables: 2 +// RAW: Indirect Target Results: +// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%) +// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%) +// RAW-NEXT: VTable Results: +// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%) +// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED: Indirect Target Results: +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%) +// INDEXED-NEXT: VTable Results: +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +// COMMON: Instrumentation level: IR entry_first = 0 +// COMMON-NEXT: Functions shown: 1 +// COMMON-NEXT: Total functions: 7 +// COMMON-NEXT: Maximum function count: 1000 +// COMMON-NEXT: Maximum internal block count: 1000 +// COMMON-NEXT: Statistics for indirect call sites profile: +// COMMON-NEXT: Total number of sites: 2 +// COMMON-NEXT: Total number of sites with values: 2 +// COMMON-NEXT: Total number of profiled values: 4 +// COMMON-NEXT: Value sites histogram: +// COMMON-NEXT: NumTargets, SiteCount +// COMMON-NEXT: 2, 2 +// COMMON-NEXT: Statistics for vtable profile: +// COMMON-NEXT: Total number of sites: 2 +// COMMON-NEXT: Total number of sites with values: 2 +// COMMON-NEXT: Total number of profiled values: 4 +// COMMON-NEXT: Value sites histogram: +// COMMON-NEXT: NumTargets, SiteCount +// COMMON-NEXT: 2, 2 + +// ICTEXT: :ir +// ICTEXT: main +// ICTEXT: # Func Hash: +// ICTEXT: 470088714870327456 +// ICTEXT: # Num Counters: +// ICTEXT: 4 +// ICTEXT: # Counter Values: +// ICTEXT: 1000 +// ICTEXT: 1000 +// ICTEXT: 200 +// ICTEXT: 1 +// ICTEXT: # Num Value Kinds: +// ICTEXT: 2 +// ICTEXT: # ValueKind = IPVK_IndirectCallTarget: +// ICTEXT: 0 +// ICTEXT: # NumValueSites: +// ICTEXT: 2 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150 +// ICTEXT: _ZN8Derived14funcEii:50 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750 +// ICTEXT: _ZN8Derived1D0Ev:250 +// ICTEXT: # ValueKind = IPVK_VTableTarget: +// ICTEXT: 2 +// ICTEXT: # NumValueSites: +// ICTEXT: 2 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150 +// ICTEXT: _ZTV8Derived1:50 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 +// ICTEXT: _ZTV8Derived1:250 + +// When vtable value profiles exist, pgo-instr-use pass should annotate them +// even if `-enable-vtable-value-profiling` is not explicitly on. +// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \ +// RUN: -mllvm -print-after=pgo-instr-use -mllvm -filter-print-funcs=main \ +// RUN: -mllvm -print-module-scope %s 2>&1 | FileCheck %s --check-prefix=ANNOTATE + +// ANNOTATE-NOT: Inconsistent number of value sites +// ANNOTATE: !{!"VP", i32 2 + +// When vtable value profiles exist, pgo-instr-use pass will not annotate them +// if `-icp-max-num-vtables` is set to zero. +// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \ +// RUN: -mllvm -icp-max-num-vtables=0 -mllvm -print-after=pgo-instr-use \ +// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=OMIT + +// OMIT: Inconsistent number of value sites +// OMIT-NOT: !{!"VP", i32 2 + +// Test indirect call promotion transformation using vtable profiles. +// - Build with `-g` to enable debug information. +// - In real world settings, ICP pass is disabled in prelink pipeline. In +// the postlink pipeline, ICP is enabled after whole-program-devirtualization +// pass. Do the same thing in this test. +// - Enable `-fwhole-program-vtables` generate type metadata and intrinsics. +// - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to +// preserve type intrinsics for ICP pass. +// RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \ +// RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \ +// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ +// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \ +// RUN: -mllvm -enable-vtable-profile-use \ +// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \ +// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \ +// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP" + +// For the indirect call site `ptr->func` +// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E} +// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1} +// +// For the indirect call site `delete ptr` +// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E} +// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1} + +// The IR matchers for indirect callsite `ptr->func`. +// IR-LABEL: @main +// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei +// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]] +// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16) +// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]], +// +// IR: [[BB1]]: +// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii +// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]] +// +// IR: [[BB2]]: +// IR: [[CMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV8Derived1, i32 16) +// IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]], +// +// IR: [[BB3]]: +// IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii +// IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]], +// +// IR: [[BB4]]: +// IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]] +// IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]] +// IR: br label %[[MERGE1]] +// +// IR: [[MERGE1]]: +// IR: [[RES1:%.*]] = phi i32 [ [[RESBB4]], %[[BB4]] ], [ [[RESBB3]], %[[BB3]] ] +// IR: br label %[[MERGE0]] +// +// IR: [[MERGE0]]: +// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ] +#include +#include +class Base { +public: + virtual int func(int a, int b) = 0; + + virtual ~Base() {}; +}; +class Derived1 : public Base { +public: + int func(int a, int b) override { return a * b; } + + ~Derived1() {} +}; +namespace { +class Derived2 : public Base { +public: + int func(int a, int b) override { return a * (a - b); } + + ~Derived2() {} +}; +} // namespace +__attribute__((noinline)) Base *createType(int a) { + Base *base = nullptr; + if (a % 4 == 0) + base = new Derived1(); + else + base = new Derived2(); + return base; +} +int main(int argc, char **argv) { + int sum = 0; + for (int i = 0; i < 1000; i++) { + int a = rand(); + int b = rand(); + Base *ptr = createType(i); + if (i % 5 == 0) + sum += ptr->func(b, a); + + delete ptr; + } + printf("sum is %d\n", sum); + return 0; +} diff --git a/compiler-rt/test/profile/instrprof-write-buffer-internal.c b/compiler-rt/test/profile/instrprof-write-buffer-internal.c index 7b96c6d91c33f5aaecb91ce30344ddd10878da90..2c1c29ac0c588a8ce38e4b518ade393e18c10756 100644 --- a/compiler-rt/test/profile/instrprof-write-buffer-internal.c +++ b/compiler-rt/test/profile/instrprof-write-buffer-internal.c @@ -25,17 +25,19 @@ const char *__llvm_profile_begin_names(void); const char *__llvm_profile_end_names(void); char *__llvm_profile_begin_counters(void); char *__llvm_profile_end_counters(void); +char *__llvm_profile_begin_bitmap(void); +char *__llvm_profile_end_bitmap(void); uint64_t __llvm_profile_get_size_for_buffer_internal( const void *DataBegin, const void *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd); + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + const char *NamesBegin, const char *NamesEnd, const void *VTableBegin, + const void *VTableEnd, const char *VNamesBegin, const char *VNamesEnd); -int __llvm_profile_write_buffer_internal(char *Buffer, const void *DataBegin, - const void *DataEnd, - const char *CountersBegin, - const char *CountersEnd, - const char *NamesBegin, - const char *NamesEnd); +int __llvm_profile_write_buffer_internal( + char *Buffer, const void *DataBegin, const void *DataEnd, + const char *CountersBegin, const char *CountersEnd, const char *BitmapBegin, + const char *BitmapEnd, const char *NamesBegin, const char *NamesEnd); void __llvm_profile_set_dumped(void); @@ -43,12 +45,15 @@ int main(int argc, const char *argv[]) { uint64_t bufsize = __llvm_profile_get_size_for_buffer_internal( __llvm_profile_begin_data(), __llvm_profile_end_data(), __llvm_profile_begin_counters(), __llvm_profile_end_counters(), - __llvm_profile_begin_names(), __llvm_profile_end_names()); + __llvm_profile_begin_bitmap(), __llvm_profile_end_bitmap(), + __llvm_profile_begin_names(), __llvm_profile_end_names(), NULL, NULL, + NULL, NULL); char *buf = malloc(bufsize); - int ret = __llvm_profile_write_buffer_internal(buf, - __llvm_profile_begin_data(), __llvm_profile_end_data(), + int ret = __llvm_profile_write_buffer_internal( + buf, __llvm_profile_begin_data(), __llvm_profile_end_data(), __llvm_profile_begin_counters(), __llvm_profile_end_counters(), + __llvm_profile_begin_bitmap(), __llvm_profile_end_bitmap(), __llvm_profile_begin_names(), __llvm_profile_end_names()); if (ret != 0) { diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index be42733ca1405673e546643cffd13a79a2ef5e9a..f5e3c13ffbc8e6c77853f2b4f04d40073faeada0 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -195,8 +195,14 @@ OPTIONS .. option:: --debug-info= Specify the executable or ``.dSYM`` that contains debug info for the raw profile. - When ``-debug-info-correlate`` was used for instrumentation, use this option - to correlate the raw profile. + When ``--debug-info-correlate`` or ``--profile-correlate=debug-info`` was used + for instrumentation, use this option to correlate the raw profile. + +.. option:: --binary-file= + + Specify the executable that contains profile data and profile name sections for + the raw profile. When ``-profile-correlate=binary`` was used for + instrumentation, use this option to correlate the raw profile. .. option:: --temporal-profile-trace-reservoir-size @@ -346,8 +352,9 @@ OPTIONS .. option:: --debug-info= Specify the executable or ``.dSYM`` that contains debug info for the raw profile. - When ``-debug-info-correlate`` was used for instrumentation, use this option - to show the correlated functions from the raw profile. + When ``--debug-info-correlate`` or ``--profile-correlate=debug-info`` was used + for instrumentation, use this option to show the correlated functions from the + raw profile. .. option:: --covered diff --git a/llvm/docs/InstrProfileFormat.rst b/llvm/docs/InstrProfileFormat.rst new file mode 100644 index 0000000000000000000000000000000000000000..3b33c09f8c7a2f4cf3aa7c8a2400c9ec1ea35c20 --- /dev/null +++ b/llvm/docs/InstrProfileFormat.rst @@ -0,0 +1,531 @@ +=================================== +Instrumentation Profile Format +=================================== + +.. contents:: + :local: + + +Overview +========= + +Clang supports two types of profiling via instrumentation [1]_: frontend-based +and IR-based, and both could support a variety of use cases [2]_ . +This document describes two binary serialization formats (raw and indexed) to +store instrumented profiles with a specific emphasis on IRPGO use case, in the +sense that when specific header fields and payload sections have different ways +of interpretation across use cases, the documentation is based on IRPGO. + +.. note:: + Frontend-generated profiles are used together with coverage mapping for + `source-based code coverage`_. The `coverage mapping format`_ is different from + profile format. + +.. _`source-based code coverage`: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html +.. _`coverage mapping format`: https://llvm.org/docs/CoverageMappingFormat.html + +Raw Profile Format +=================== + +The raw profile is generated by running the instrumented binary. The raw profile +data from an executable or a shared library [3]_ consists of a header and +multiple sections, with each section as a memory dump. The raw profile data needs +to be reasonably compact and fast to generate. + +There are no backward or forward version compatiblity guarantees for the raw profile +format. That is, compilers and tools `require`_ a specific raw profile version +to parse the profiles. + +.. _`require`: https://github.com/llvm/llvm-project/blob/bffdde8b8e5d9a76a47949cd0f574f3ce656e181/llvm/lib/ProfileData/InstrProfReader.cpp#L551-L558 + +To feed profiles back into compilers for an optimized build (e.g., via +``-fprofile-use`` for IR instrumentation), a raw profile must to be converted into +indexed format. + +General Storage Layout +----------------------- + +The storage layout of raw profile data format is illustrated below. Basically, +when the raw profile is read into an memory buffer, the actual byte offset of a +section is inferred from the section's order in the layout and size information +of all the sections ahead of it. + +:: + + +----+-----------------------+ + | | Magic | + | +-----------------------+ + | | Version | + | +-----------------------+ + H | Size Info for | + E | Section 1 | + A +-----------------------+ + D | Size Info for | + E | Section 2 | + R +-----------------------+ + | | ... | + | +-----------------------+ + | | Size Info for | + | | Section N | + +----+-----------------------+ + P | Section 1 | + A +-----------------------+ + Y | Section 2 | + L +-----------------------+ + O | ... | + A +-----------------------+ + D | Section N | + +----+-----------------------+ + + +.. note:: + Sections might be padded to meet specific alignment requirements. For + simplicity, header fields and data sections solely for padding purpose are + omitted in the data layout graph above and the rest of this document. + +Header +------- + +``Magic`` + Magic number encodes profile format (raw, indexed or text). For the raw format, + the magic number also encodes the endianness (big or little) and C pointer + size (4 or 8 bytes) of the platform on which the profile is generated. + + A factory method reads the magic number to construct reader properly and returns + error upon unrecognized format. Specifically, the factory method and raw profile + reader implementation make sure that a raw profile file could be read back on + a platform with the opposite endianness and/or the other C pointer size. + +``Version`` + The lower 32 bits specify the actual version and the most significant 32 bits + specify the variant types of the profile. IR-based instrumentation PGO and + context-sensitive IR-based instrumentation PGO are two variant types. + +``BinaryIdsSize`` + The byte size of `binary id`_ section. + +``NumData`` + The number of profile metadata. The byte size of `profile metadata`_ section + could be computed with this field. + +``NumCounter`` + The number of entries in the profile counter section. The byte size of `counter`_ + section could be computed with this field. + +``NumBitmapBytes`` + The number of bytes in the profile `bitmap`_ section. + +``NamesSize`` + The number of bytes in the name section. + +.. _`CountersDelta`: + +``CountersDelta`` + This field records the in-memory address difference between the `profile metadata`_ + and counter section in the instrumented binary, i.e., ``start(__llvm_prf_cnts) - start(__llvm_prf_data)``. + + It's used jointly with the `CounterPtr`_ field to compute the counter offset + relative to ``start(__llvm_prf_cnts)``. Check out calculation-of-counter-offset_ + for a visualized explanation. + + .. note:: + The ``__llvm_prf_data`` object file section might not be loaded into memory + when instrumented binary runs or might not get generated in the instrumented + binary in the first place. In those cases, ``CountersDelta`` is not used and + other mechanisms are used to match counters with instrumented code. See + `lightweight instrumentation`_ and `binary profile correlation`_ for examples. + +``BitmapDelta`` + This field records the in-memory address difference between the `profile metadata`_ + and bitmap section in the instrumented binary, i.e., ``start(__llvm_prf_bits) - start(__llvm_prf_data)``. + + It's used jointly with the `BitmapPtr`_ to find the bitmap of a profile data + record, in a similar way to how counters are referenced as explained by + calculation-of-counter-offset_ . + + Similar to `CountersDelta`_ field, this field may not be used in non-PGO variants + of profiles. + +``NamesDelta`` + Records the in-memory address of name section. Not used except for raw profile + reader error checking. + +``NumVTables`` + Records the number of instrumented vtable entries in the binary. Used for + `type profiling`_. + +``VNamesSize`` + Records the byte size in the virtual table names section. Used for `type profiling`_. + +``ValueKindLast`` + Records the number of value kinds. Macro `VALUE_PROF_KIND`_ defines the value + kinds with a description of the kind. + +.. _`VALUE_PROF_KIND`: https://github.com/llvm/llvm-project/blob/7e405eb722e40c79b7726201d0f76b5dab34ba0f/compiler-rt/include/profile/InstrProfData.inc#L184-L186 + +Payload Sections +------------------ + +Binary Ids +^^^^^^^^^^^ +Stores the binary ids of the instrumented binaries to associate binaries with +profiles for source code coverage. See `binary id`_ RFC for the design. + +.. _`profile metadata`: + +Profile Metadata +^^^^^^^^^^^^^^^^^^ + +This section stores the metadata to map counters and value profiles back to +instrumented code regions (e.g., LLVM IR for IRPGO). + +The in-memory representation of the metadata is `__llvm_profile_data`_. +Some fields are used to reference data from other sections in the profile. +The fields are documented as follows: + +.. _`__llvm_profile_data`: https://github.com/llvm/llvm-project/blob/7c3b67d2038cfb48a80299089f6a1308eee1df7f/compiler-rt/include/profile/InstrProfData.inc#L65-L95 + +``NameRef`` + The MD5 of the function's PGO name. PGO name has the format + ``[]`` where ```` and + ```` are provided for local-linkage functions to tell possibly + identical functions. + +.. _FuncHash: + +``FuncHash`` + A checksum of the function's IR, taking control flow graph and instrumented + value sites into accounts. See `computeCFGHash`_ for details. + +.. _`computeCFGHash`: https://github.com/llvm/llvm-project/blob/7c3b67d2038cfb48a80299089f6a1308eee1df7f/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp#L616-L685 + +.. _`CounterPtr`: + +``CounterPtr`` + The in-memory address difference between profile data and the start of corresponding + counters. Counter position is stored this way (as a link-time constant) to reduce + instrumented binary size compared with snapshotting the address of symbols directly. + See `commit a1532ed`_ for further information. + +.. _`commit a1532ed`: https://github.com/llvm/llvm-project/commit/a1532ed27582038e2d9588108ba0fe8237f01844 + + .. note:: + ``CounterPtr`` might represent a different value for non-IRPGO use case. For + example, for `binary profile correlation`_, it represents the absolute address of counter. + When in doubt, check source code. + +.. _`BitmapPtr`: + +``BitmapPtr`` + The in-memory address difference between profile data and the start address of + corresponding bitmap. + + .. note:: + Similar to `CounterPtr`_, this field may represent a different value for non-IRPGO use case. + +``FunctionPointer`` + Records the function address when instrumented binary runs. This is used to + map the profiled callee address of indirect calls to the ``NameRef`` during + conversion from raw to indexed profiles. + +``Values`` + Represents value profiles in a two dimensional array. The number of elements + in the first dimension is the number of instrumented value sites across all + kinds. Each element in the first dimension is the head of a linked list, and + the each element in the second dimension is linked list element, carrying + ```` as payload. This is used by compiler runtime when + writing out value profiles. + + .. note:: + Value profiling is supported by frontend and IR PGO instrumentation, + but it's not supported in all cases (e.g., `lightweight instrumentation`_). + +``NumCounters`` + The number of counters for the instrumented function. + +``NumValueSites`` + This is an array of counters, and each counter represents the number of + instrumented sites for a kind of value in the function. + +``NumBitmapBytes`` + The number of bitmap bytes for the function. + +.. _`counter`: + +Profile Counters +^^^^^^^^^^^^^^^^^ + +For PGO [4]_, the counters within an instrumented function of a specific `FuncHash`_ +are stored contiguously and in an order that is consistent with instrumentation points selection. + +.. _calculation-of-counter-offset: + +As mentioned above, the recorded counter offset is relative to the profile metadata. +So how are function counters located in the raw profile data? + +Basically, the profile reader iterates profile metadata (from the `profile metadata`_ +section) and makes use of the recorded relative distances, as illustrated below. + +:: + + + --> start(__llvm_prf_data) --> +---------------------+ ------------+ + | | Data 1 | | + | +---------------------+ =====|| | + | | Data 2 | || | + | +---------------------+ || | + | | ... | || | + Counter| +---------------------+ || | + Delta | | Data N | || | + | +---------------------+ || | CounterPtr1 + | || | + | CounterPtr2 || | + | || | + | || | + + --> start(__llvm_prf_cnts) --> +---------------------+ || | + | ... | || | + +---------------------+ -----||----+ + | Counter for | || + | Data 1 | || + +---------------------+ || + | ... | || + +---------------------+ =====|| + | Counter for | + | Data 2 | + +---------------------+ + | ... | + +---------------------+ + | Counter for | + | Data N | + +---------------------+ + + +In the graph, + +* The profile header records ``CounterDelta`` with the value as ``start(__llvm_prf_cnts) - start(__llvm_prf_data)``. + We will call it ``CounterDeltaInitVal`` below for convenience. +* For each profile data record ``ProfileDataN``, ``CounterPtr`` is recorded as + ``start(CounterN) - start(ProfileDataN)``, where ``ProfileDataN`` is the N-th + entry in ``__llvm_prf_data``, and ``CounterN`` represents the corresponding + profile counters. + +Each time the reader advances to the next data record, it `updates`_ ``CounterDelta`` +to minus the size of one ``ProfileData``. + +.. _`updates`: https://github.com/llvm/llvm-project/blob/17ff25a58ee4f29816d932fdb75f0d305718069f/llvm/include/llvm/ProfileData/InstrProfReader.h#L439-L444 + +For the counter corresponding to the first data record, the byte offset +relative to the start of the counter section is calculated as ``CounterPtr1 - CounterDeltaInitVal``. +When profile reader advances to the second data record, note ``CounterDelta`` +is updated to ``CounterDeltaInitVal - sizeof(ProfileData)``. +Thus the byte offset relative to the start of the counter section is calculated +as ``CounterPtr2 - (CounterDeltaInitVal - sizeof(ProfileData))``. + +.. _`bitmap`: + +Bitmap +^^^^^^^ +This section is used for source-based `Modified Condition/Decision Coverage`_ code coverage. Check out `Bitmap RFC`_ +for the design. + +.. _`Modified Condition/Decision Coverage`: https://en.wikipedia.org/wiki/Modified_condition/decision_coverage +.. _`Bitmap RFC`: https://discourse.llvm.org/t/rfc-source-based-mc-dc-code-coverage/59244 + +.. _`function names`: + +Names +^^^^^^ + +This section contains possibly compressed concatenated string of functions' PGO +names. If compressed, zlib library is used. + +Function names serve as keys in the PGO data hash table when raw profiles are +converted into indexed profiles. They are also crucial for ``llvm-profdata`` to +show the profiles in a human-readable way. + +Virtual Table Profile Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This section is used for `type profiling`_. Each entry corresponds to one virtual +table and is defined by the following C++ struct + +.. code-block:: c++ + + struct VTableProfData { + // The start address of the vtable, collected at runtime. + uint64_t StartAddress; + // The byte size of the vtable. `StartAddress` and `ByteSize` specifies an address range to look up. + uint32_t ByteSize; + // The hash of vtable's (PGO) name + uint64_t MD5HashOfName; + }; + +At profile use time, the compiler looks up a profiled address in the sorted vtable +address ranges and maps the address to a specific vtable through hashed name. + +Virtual Table Names +^^^^^^^^^^^^^^^^^^^^ + +This section is similar to `function names`_ section above, except it contains the PGO +names of profiled virtual tables. It's a standalone section such that raw profile +readers could directly find each name set by accessing the corresponding profile +data section. + +This section is stored in raw profiles such that `llvm-profdata` could show the +profiles in a human-readable way. + +Value Profile Data +^^^^^^^^^^^^^^^^^^^^ + +This section contains the profile data for value profiling. + +The value profiles corresponding to a profile metadata are serialized contiguously +as one record, and value profile records are stored in the same order as the +respective profile data, such that a raw profile reader `advances`_ the pointer to +profile data and the pointer to value profile records simutaneously [5]_ to find +value profiles for a per function, per `FuncHash`_ profile data. + +.. _`advances`: https://github.com/llvm/llvm-project/blob/7e15fa9161eda7497a5d6abf0d951a1d12d86550/llvm/include/llvm/ProfileData/InstrProfReader.h#L456-L457 + +Indexed Profile Format +=========================== + +Indexed profiles are generated from ``llvm-profdata``. In the indexed profiles, +function data are organized as on-disk hash table such that compilers can +look up profile data for functions in an IR module. + +Compilers and tools must retain backward compatibility with indexed profiles. +That is, a tool or a compiler built at newer versions of code must understand +profiles generated by older tools or compilers. + +General Storage Layout +----------------------- + +The ASCII art depicts the general storage layout of indexed profiles. +Specifically, the indexed profile header describes the byte offset of individual +payload sections. + +:: + + +-----------------------+---+ + | Magic | | + +-----------------------+ | + | Version | | + +-----------------------+ | + | HashType | H + +-----------------------+ E + | Byte Offset | A + +------ | of section A | D + | +-----------------------+ E + | | Byte Of fset | R + +-----------| of section B | | + | | +-----------------------+ | + | | | ... | | + | | +-----------------------+ | + | | | Byte Offset | | + +---------------| of section Z | | + | | | +-----------------------+---+ + | | | | Profile Summary | | + | | | +-----------------------+ P + | | +------>| Section A | A + | | +-----------------------+ Y + | +---------->| Section B | L + | +-----------------------+ O + | | ... | A + | +-----------------------+ D + +-------------->| Section Z | | + +-----------------------+---+ + +.. note:: + + Profile summary section is at the beginning of payload. It's right after the + header so its position is implicitly known after reading the header. + +Header +-------- + +The `Header struct`_ is the source of truth and struct fields should explain +what's in the header. At a high level, `*Offset` fields record section byte +offsets, which are used by readers to locate interesting sections and skip +uninteresting ones. + +.. note:: + + To maintain backward compatibility of the indexed profiles, existing fields + shouldn't be deleted from struct definition; the field order shouldn't be + modified. New fields should be appended. + +.. _`Header struct`: https://github.com/llvm/llvm-project/blob/1a2960bab6381f2b288328e2371829b460ac020c/llvm/include/llvm/ProfileData/InstrProf.h#L1053-L1080 + + +Payload Sections +------------------ + +(CS) Profile Summary +^^^^^^^^^^^^^^^^^^^^^ +This section is right after profile header. It stores the serialized profile +summary. For context-sensitive IR-based instrumentation PGO, this section stores +an additional profile summary corresponding to the context-sensitive profiles. + +.. _`function data`: + +Function data +^^^^^^^^^^^^^^^^^^ +This section stores functions and their profiling data as an on-disk hash table. +Profile data for functions with the same name are grouped together and share one +hash table entry (the functions may come from different shared libraries for +instance). The profile data for them are organized as a sequence of key-value +pair where the key is `FuncHash`_, and the value is profiled information (represented +by `InstrProfRecord`_) for the function. + +.. _`InstrProfRecord`: https://github.com/llvm/llvm-project/blob/7e405eb722e40c79b7726201d0f76b5dab34ba0f/llvm/include/llvm/ProfileData/InstrProf.h#L693 + +MemProf Profile data +^^^^^^^^^^^^^^^^^^^^^^ +This section stores function's memory profiling data. See +`MemProf binary serialization format RFC`_ for the design. + +.. _`MemProf binary serialization format RFC`: https://lists.llvm.org/pipermail/llvm-dev/2021-September/153007.html + +Binary Ids +^^^^^^^^^^^^^^^^^^^^^^ +The section is used to carry on `binary id`_ information from raw profiles. + +Temporal Profile Traces +^^^^^^^^^^^^^^^^^^^^^^^^ +The section is used to carry on temporal profile information from raw profiles. +See `temporal profiling`_ for the design. + +Virtual Table Names +^^^^^^^^^^^^^^^^^^^^ +This section is used to store the names of vtables from raw profile in the indexed +profile. + +Unlike function names which are stored as keys of `function data`_ hash table, +vtable names need to be stored in a standalone section in indexed profiles. +This way, `llvm-profdata` could show the profiled vtable information in a +human-readable way. + +Profile Data Usage +======================================= + +``llvm-profdata`` is the command line tool to display and process instrumentation- +based profile data. For supported usages, check out `llvm-profdata documentation `_. + +.. [1] For usage, see https://clang.llvm.org/docs/UsersManual.html#profiling-with-instrumentation +.. [2] For example, IR-based instrumentation supports `lightweight instrumentation`_ + and `temporal profiling`_. Frontend instrumentation could support `single-byte counters`_. +.. [3] A raw profile file could contain the concatenation of multiple raw + profiles, for example, from an executable and its shared libraries. Raw + profile reader could parse all raw profiles from the file correctly. +.. [4] The counter section is used by a few variant types (like temporal + profiling) and might have different semantics there. +.. [5] The step size of data pointer is the ``sizeof(ProfileData)``, and the step + size of value profile pointer is calcuated based on the number of collected + values. + +.. _`lightweight instrumentation`: https://groups.google.com/g/llvm-dev/c/r03Z6JoN7d4 +.. _`temporal profiling`: https://discourse.llvm.org/t/rfc-temporal-profiling-extension-for-irpgo/68068 +.. _`single-byte counters`: https://discourse.llvm.org/t/rfc-single-byte-counters-for-source-based-code-coverage/75685 +.. _`binary profile correlation`: https://discourse.llvm.org/t/rfc-add-binary-profile-correlation-to-not-load-profile-metadata-sections-into-memory-at-runtime/74565 +.. _`binary id`: https://lists.llvm.org/pipermail/llvm-dev/2021-June/151154.html +.. _`type profiling`: https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600 diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2f4d01c4cddd67bfe1e8d80387eca5ccd3d53473..e9caee30b1a399be848421c2660df09cd670a013 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -13820,6 +13820,144 @@ pass will generate the appropriate data structures and replace the ``llvm.instrprof.value.profile`` intrinsic with the call to the profile runtime library with proper arguments. +'``llvm.instrprof.mcdc.parameters``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.mcdc.parameters(ptr , i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.instrprof.mcdc.parameters``' intrinsic is used to initiate MC/DC +code coverage instrumentation for a function. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. + +The third argument is the number of bitmap bytes required by the function to +record the number of test vectors executed for each boolean expression. + +Semantics: +"""""""""" + +This intrinsic represents basic MC/DC parameters initiating one or more MC/DC +instrumentation sequences in a function. It will cause the ``-instrprof`` pass +to generate the appropriate data structures and the code to instrument MC/DC +test vectors in a format that can be written out by a compiler runtime and +consumed via the ``llvm-profdata`` tool. + +'``llvm.instrprof.mcdc.condbitmap.update``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.mcdc.condbitmap.update(ptr , i64 , + i32 , + ptr , + i1 ) + +Overview: +""""""""" + +The '``llvm.instrprof.mcdc.condbitmap.update``' intrinsic is used to track +MC/DC condition evaluation for each condition in a boolean expression. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. + +The third argument is an ID of a condition to track. This value is used as a +bit index into the condition bitmap. + +The fourth argument is the address of the condition bitmap. + +The fifth argument is the boolean value representing the evaluation of the +condition (true or false) + +Semantics: +"""""""""" + +This intrinsic represents the update of a condition bitmap that is local to a +function and will cause the ``-instrprof`` pass to generate the code to +instrument the control flow around each condition in a boolean expression. The +ID of each condition corresponds to a bit index in the condition bitmap which +is set based on the evaluation of the condition. + +'``llvm.instrprof.mcdc.tvbitmap.update``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.mcdc.tvbitmap.update(ptr , i64 , + i32 ) + i32 , + ptr ) + +Overview: +""""""""" + +The '``llvm.instrprof.mcdc.tvbitmap.update``' intrinsic is used to track MC/DC +test vector execution after each boolean expression has been fully executed. +The overall value of the condition bitmap, after it has been successively +updated using the '``llvm.instrprof.mcdc.condbitmap.update``' intrinsic with +the true or false evaluation of each condition, uniquely identifies an executed +MC/DC test vector and is used as a bit index into the global test vector +bitmap. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. + +The third argument is the number of bitmap bytes required by the function to +record the number of test vectors executed for each boolean expression. + +The fourth argument is the byte index into the global test vector bitmap +corresponding to the function. + +The fifth argument is the address of the condition bitmap, which contains a +value representing an executed MC/DC test vector. It is loaded and used as the +bit index of the test vector bitmap. + +Semantics: +"""""""""" + +This intrinsic represents the final operation of an MC/DC instrumentation +sequence and will cause the ``-instrprof`` pass to generate the code to +instrument an update of a function's global test vector bitmap to indicate that +a test vector has been executed. The global test vector bitmap can be consumed +by the ``llvm-profdata`` and ``llvm-cov`` tools. + '``llvm.thread.pointer``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h index 8a05e913a91063b7a21b2a565ad7ca32eff3c63a..9c2be12fce2fbb593e50ea7ca5401ace944932a6 100644 --- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h +++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -57,10 +57,8 @@ public: /// /// The returned array space is owned by this class, and overwritten on /// subsequent calls. - ArrayRef - getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, - uint64_t &TotalCount, - uint32_t &NumCandidates); + MutableArrayRef getPromotionCandidatesForInstruction( + const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates); }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h index 0825e19ecd2d24008ed832216c5163698405e1b8..549c6d9b9466dd4641b516f984bd25e2886e5d4c 100644 --- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h +++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h @@ -16,23 +16,80 @@ #include namespace llvm { -// Visitor class that finds all indirect call. +// Visitor class that finds indirect calls or instructions that gives vtable +// value, depending on Type. struct PGOIndirectCallVisitor : public InstVisitor { + enum class InstructionType { + kIndirectCall = 0, + kVTableVal = 1, + }; std::vector IndirectCalls; - PGOIndirectCallVisitor() = default; + std::vector ProfiledAddresses; + PGOIndirectCallVisitor(InstructionType Type) : Type(Type) {} + + // Given an indirect call instruction, try to find the the following pattern + // + // %vtable = load ptr, ptr %obj + // %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + // %2 = load ptr, ptr %vfn + // $call = tail call i32 %2 + // + // A heuristic is used to find the address feeding instructions. + static Instruction *tryGetVTableInstruction(CallBase *CB) { + assert(CB != nullptr && "Caller guaranteed"); + LoadInst *LI = dyn_cast(CB->getCalledOperand()); + if (LI != nullptr) { + Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast) + Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets(); + // This is a heuristic to find address feeding instructions. + // FIXME: Add support in the frontend so LLVM type intrinsics are + // emitted without LTO. This way, added intrinsics could filter + // non-vtable instructions and reduce instrumentation overhead. + // Since a non-vtable profiled address is not within the address + // range of vtable objects, it's stored as zero in indexed profiles. + // A pass that looks up symbol with an zero hash will (almost) always + // find nullptr and skip the actual transformation (e.g., comparison + // of symbols). So the performance overhead from non-vtable profiled + // address is negligible if exists at all. Comparing loaded address + // with symbol address guarantees correctness. + if (VTablePtr != nullptr && isa(VTablePtr)) + return cast(VTablePtr); + } + return nullptr; + } void visitCallBase(CallBase &Call) { - if (Call.isIndirectCall()) + if (Call.isIndirectCall()) { IndirectCalls.push_back(&Call); + + if (Type != InstructionType::kVTableVal) + return; + + Instruction *VPtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(&Call); + if (VPtr) + ProfiledAddresses.push_back(VPtr); + } } + +private: + InstructionType Type; }; -// Helper function that finds all indirect call sites. inline std::vector findIndirectCalls(Function &F) { - PGOIndirectCallVisitor ICV; + PGOIndirectCallVisitor ICV( + PGOIndirectCallVisitor::InstructionType::kIndirectCall); ICV.visit(F); return ICV.IndirectCalls; } + +inline std::vector findVTableAddrs(Function &F) { + PGOIndirectCallVisitor ICV( + PGOIndirectCallVisitor::InstructionType::kVTableVal); + ICV.visit(F); + return ICV.ProfiledAddresses; +} + } // namespace llvm #endif diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h index dab67aad1ab0eb7f4f7dd8f48e6b1e6e1dbfaa2d..8894945c28d94790b92d108aee34c7d9b603e56d 100644 --- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_TYPEMETADATAUTILS_H #include +#include namespace llvm { @@ -24,6 +25,7 @@ class CallInst; class Constant; class Function; class DominatorTree; +class GlobalVariable; class Instruction; class Module; @@ -77,6 +79,13 @@ void findDevirtualizableCallsForTypeCheckedLoad( Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, Constant *TopLevelGlobal = nullptr); +/// Given a vtable and a specified offset, returns the function and the trivial +/// pointer at the specified offset in pair iff the pointer at the specified +/// offset is a function or an alias to a function. Returns a pair of nullptr +/// otherwise. +std::pair +getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M); + /// Finds the same "relative pointer" pattern as described above, where the /// target is `F`, and replaces the entire pattern with a constant zero. void replaceRelativePointerUsersWithZero(Function *F); diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 9d638af6eeeffbccf58604455277deaa16e3cec3..b32bb6d1430264cd8c3db8c090b6ebf127823acf 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -4013,6 +4013,9 @@ public: unsigned getNumSuccessors() const { return 2; } + /// Updates profile metadata by scaling it by \p S / \p T. + void updateProfWeight(uint64_t S, uint64_t T); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Invoke); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 62bd833198f022b2c015f4271b92460705080de7..f04b5ae152f2983361eaf36fd56a1274dccc74ce 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1399,6 +1399,11 @@ public: ConstantInt *getHash() const { return cast(const_cast(getArgOperand(1))); } +}; + +/// A base class for all instrprof counter intrinsics. +class InstrProfCntrInstBase : public InstrProfInstBase { +public: // The number of counters for the instrumented function. ConstantInt *getNumCounters() const; // The index of the counter that this instruction acts on. @@ -1406,7 +1411,7 @@ public: }; /// This represents the llvm.instrprof.cover intrinsic. -class InstrProfCoverInst : public InstrProfInstBase { +class InstrProfCoverInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_cover; @@ -1417,7 +1422,7 @@ public: }; /// This represents the llvm.instrprof.increment intrinsic. -class InstrProfIncrementInst : public InstrProfInstBase { +class InstrProfIncrementInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_increment || @@ -1441,7 +1446,7 @@ public: }; /// This represents the llvm.instrprof.timestamp intrinsic. -class InstrProfTimestampInst : public InstrProfInstBase { +class InstrProfTimestampInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_timestamp; @@ -1452,7 +1457,7 @@ public: }; /// This represents the llvm.instrprof.value.profile intrinsic. -class InstrProfValueProfileInst : public InstrProfInstBase { +class InstrProfValueProfileInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_value_profile; @@ -1475,6 +1480,87 @@ public: } }; +/// A base class for instrprof mcdc intrinsics that require global bitmap bytes. +class InstrProfMCDCBitmapInstBase : public InstrProfInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_parameters || + I->getIntrinsicID() == Intrinsic::instrprof_mcdc_tvbitmap_update; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + /// \return The number of bytes used for the MCDC bitmaps for the instrumented + /// function. + ConstantInt *getNumBitmapBytes() const { + return cast(const_cast(getArgOperand(2))); + } +}; + +/// This represents the llvm.instrprof.mcdc.parameters intrinsic. +class InstrProfMCDCBitmapParameters : public InstrProfMCDCBitmapInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_parameters; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.instrprof.mcdc.tvbitmap.update intrinsic. +class InstrProfMCDCTVBitmapUpdate : public InstrProfMCDCBitmapInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_tvbitmap_update; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + /// \return The index of the TestVector Bitmap upon which this intrinsic + /// acts. + ConstantInt *getBitmapIndex() const { + return cast(const_cast(getArgOperand(3))); + } + + /// \return The address of the corresponding condition bitmap containing + /// the index of the TestVector to update within the TestVector Bitmap. + Value *getMCDCCondBitmapAddr() const { + return cast(const_cast(getArgOperand(4))); + } +}; + +/// This represents the llvm.instrprof.mcdc.condbitmap.update intrinsic. +/// It does not pertain to global bitmap updates or parameters and so doesn't +/// inherit from InstrProfMCDCBitmapInstBase. +class InstrProfMCDCCondBitmapUpdate : public InstrProfInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_condbitmap_update; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + /// \return The ID of the condition to update. + ConstantInt *getCondID() const { + return cast(const_cast(getArgOperand(2))); + } + + /// \return The address of the corresponding condition bitmap. + Value *getMCDCCondBitmapAddr() const { + return cast(const_cast(getArgOperand(3))); + } + + /// \return The boolean value to set in the condition bitmap for the + /// corresponding condition ID. This represents how the condition evaluated. + Value *getCondBool() const { + return cast(const_cast(getArgOperand(4))); + } +}; + class PseudoProbeInst : public IntrinsicInst { public: static bool classof(const IntrinsicInst *I) { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 20a8fa41946511b17a9406a16fe7a6b633b2ce8b..a47d94452db4b5a15027604bb2c11aa55451fc42 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -909,6 +909,21 @@ def int_instrprof_value_profile : Intrinsic<[], llvm_i64_ty, llvm_i32_ty, llvm_i32_ty]>; +// A parameter configuration for instrumentation based MCDC profiling. +def int_instrprof_mcdc_parameters : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty]>; + +// A test vector bitmap update for instrumentation based MCDC profiling. +def int_instrprof_mcdc_tvbitmap_update : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>; + +// A condition bitmap value update for instrumentation based MCDC profiling. +def int_instrprof_mcdc_condbitmap_update : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_ptr_ty, llvm_i1_ty]>; + def int_call_preallocated_setup : DefaultAttrsIntrinsic<[llvm_token_ty], [llvm_i32_ty]>; def int_call_preallocated_arg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>; def int_call_preallocated_teardown : DefaultAttrsIntrinsic<[], [llvm_token_ty]>; diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 39165453de16b093d88ce0e3a8de386c453d4b72..865c364c778d098bf9f2c54cd0ac57fc950e031b 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -59,10 +59,12 @@ public: //===------------------------------------------------------------------===// /// Return metadata containing two branch weights. - MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight); + MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, + bool IsExpected = false); /// Return metadata containing a number of branch weights. - MDNode *createBranchWeights(ArrayRef Weights); + MDNode *createBranchWeights(ArrayRef Weights, + bool IsExpected = false); /// Return metadata specifying that a branch or switch is unpredictable. MDNode *createUnpredictable(); diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 8019d0a3969b10ef42287cbffa8f6d648072f1ed..0bea517df832e37b4f6f4cfd0782a95d5420ddc1 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -55,6 +55,19 @@ MDNode *getBranchWeightMDNode(const Instruction &I); /// Nullptr otherwise. MDNode *getValidBranchWeightMDNode(const Instruction &I); +/// Check if Branch Weight Metadata has an "expected" field from an llvm.expect* +/// intrinsic +bool hasBranchWeightOrigin(const Instruction &I); + +/// Check if Branch Weight Metadata has an "expected" field from an llvm.expect* +/// intrinsic +bool hasBranchWeightOrigin(const MDNode *ProfileData); + +/// Return the offset to the first branch weight data +unsigned getBranchWeightOffset(const MDNode *ProfileData); + +unsigned getNumBranchWeights(const MDNode &ProfileData); + /// Extract branch weights from MD_prof metadata /// /// \param ProfileData A pointer to an MDNode. @@ -64,6 +77,16 @@ MDNode *getValidBranchWeightMDNode(const Instruction &I); bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl &Weights); +/// Faster version of extractBranchWeights() that skips checks and must only +/// be called with "branch_weights" metadata nodes. Supports uint32_t. +void extractFromBranchWeightMD32(const MDNode *ProfileData, + SmallVectorImpl &Weights); + +/// Faster version of extractBranchWeights() that skips checks and must only +/// be called with "branch_weights" metadata nodes. Supports uint64_t. +void extractFromBranchWeightMD64(const MDNode *ProfileData, + SmallVectorImpl &Weights); + /// Extract branch weights attatched to an Instruction /// /// \param I The Instruction to extract weights from. @@ -99,5 +122,16 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalWeights); /// metadata was found. bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights); +/// Create a new `branch_weights` metadata node and add or overwrite +/// a `prof` metadata reference to instruction `I`. +/// \param I the Instruction to set branch weights on. +/// \param Weights an array of weights to set on instruction I. +/// \param IsExpected were these weights added from an llvm.expect* intrinsic. +void setBranchWeights(Instruction &I, ArrayRef Weights, + bool IsExpected); + +/// Scaling the profile data attached to 'I' using the ratio of S/T. +void scaleProfData(Instruction &I, uint64_t S, uint64_t T); + } // namespace llvm #endif diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 3c8f940ba97b7eb373ad2160f04910414eb79ca6..3e76ba7187f39495ac00103c7aaf6f0f4df8978f 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -75,7 +75,8 @@ inline std::error_code make_error_code(coveragemap_error E) { class CoverageMapError : public ErrorInfo { public: - CoverageMapError(coveragemap_error Err) : Err(Err) { + CoverageMapError(coveragemap_error Err, const Twine &ErrStr = Twine()) + : Err(Err), Msg(ErrStr.str()) { assert(Err != coveragemap_error::success && "Not an error"); } @@ -93,6 +94,7 @@ public: private: coveragemap_error Err; + std::string Msg; }; /// A Counter is an abstract value that describes how to compute the @@ -810,7 +812,7 @@ template Error getFuncNameViaRef(const FuncRecordTy *Record, InstrProfSymtab &ProfileNames, StringRef &FuncName) { uint64_t NameRef = getFuncNameRef(Record); - FuncName = ProfileNames.getFuncName(NameRef); + FuncName = ProfileNames.getFuncOrVarName(NameRef); return Error::success(); } @@ -1023,7 +1025,9 @@ enum CovMapVersion { // Compilation directory is stored separately and combined with relative // filenames to produce an absolute file path. Version6 = 5, - // The current version is Version6. + // Branch regions extended and Decision Regions added for MC/DC. + Version7 = 6, + // The current version is Version7. CurrentVersion = INSTR_PROF_COVMAP_VERSION }; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h index 326c1b0d33384e381b0c8dc13e9375ff5dad1ba6..9ad4f889744359de126d52739f7f8a9756f34b52 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -184,7 +184,7 @@ public: private: std::vector Filenames; std::vector MappingRecords; - InstrProfSymtab ProfileNames; + std::unique_ptr ProfileNames; size_t CurrentRecord = 0; std::vector FunctionsFilenames; std::vector Expressions; @@ -195,8 +195,9 @@ private: // D69471, which can split up function records into multiple sections on ELF. FuncRecordsStorage FuncRecords; - BinaryCoverageReader(FuncRecordsStorage &&FuncRecords) - : FuncRecords(std::move(FuncRecords)) {} + BinaryCoverageReader(std::unique_ptr Symtab, + FuncRecordsStorage &&FuncRecords) + : ProfileNames(std::move(Symtab)), FuncRecords(std::move(FuncRecords)) {} public: BinaryCoverageReader(const BinaryCoverageReader &) = delete; @@ -209,12 +210,10 @@ public: SmallVectorImpl *BinaryIDs = nullptr); static Expected> - createCoverageReaderFromBuffer(StringRef Coverage, - FuncRecordsStorage &&FuncRecords, - InstrProfSymtab &&ProfileNames, - uint8_t BytesInAddress, - support::endianness Endian, - StringRef CompilationDir = ""); + createCoverageReaderFromBuffer( + StringRef Coverage, FuncRecordsStorage &&FuncRecords, + std::unique_ptr ProfileNamesPtr, uint8_t BytesInAddress, + support::endianness Endian, StringRef CompilationDir = ""); Error readNextRecord(CoverageMappingRecord &Record) override; }; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index f64d2e6cb73924af543bc8da1f29cb4a2bf29a2e..d63f1de9697fafd8fa99546c76ab968285940692 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -90,12 +91,18 @@ inline StringRef getInstrProfValueProfMemOpFuncName() { /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } +/// Return the name prefix of variables containing virtual table profile data. +inline StringRef getInstrProfVTableVarPrefix() { return "__profvt_"; } + /// Return the name prefix of variables containing per-function control data. inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } /// Return the name prefix of profile counter variables. inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } +/// Return the name prefix of profile bitmap variables. +inline StringRef getInstrProfBitmapVarPrefix() { return "__profbm_"; } + /// Return the name prefix of value profile variables. inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } @@ -104,9 +111,9 @@ inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } /// Return the name of the variable holding the strings (possibly compressed) /// of all function's PGO names. -inline StringRef getInstrProfNamesVarName() { - return "__llvm_prf_nm"; -} +inline StringRef getInstrProfNamesVarName() { return "__llvm_prf_nm"; } + +inline StringRef getInstrProfVTableNamesVarName() { return "__llvm_prf_vnm"; } /// Return the name of a covarage mapping variable (internal linkage) /// for each instrumented source module. Such variables are allocated @@ -138,7 +145,8 @@ inline StringRef getInstrProfRegFuncName() { return "__llvm_profile_register_function"; } -/// Return the name of the runtime interface that registers the PGO name strings. +/// Return the name of the runtime interface that registers the PGO name +/// strings. inline StringRef getInstrProfNamesRegFuncName() { return "__llvm_profile_register_names_function"; } @@ -184,6 +192,19 @@ std::string getPGOFuncName(StringRef RawFuncName, StringRef FileName, uint64_t Version = INSTR_PROF_INDEX_VERSION); +/// \return the modified name for function \c F suitable to be +/// used as the key for IRPGO profile lookup. \c InLTO indicates if this is +/// called from LTO optimization passes. +std::string getIRPGOFuncName(const Function &F, bool InLTO = false); + +/// \return the filename and the function name parsed from the output of +/// \c getIRPGOFuncName() +std::pair getParsedIRPGOFuncName(StringRef IRPGOFuncName); + +/// \return the filename and the function name parsed from the output of +/// \c getIRPGOFuncName() +std::pair getParsedIRPGOName(StringRef IRPGOName); + /// Return the name of the global variable used to store a function /// name in PGO instrumentation. \c FuncName is the name of the function /// returned by the \c getPGOFuncName call. @@ -211,6 +232,18 @@ StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName = ""); +/// Given a vector of strings (names of global objects like functions or, +/// virtual tables) \c NameStrs, the method generates a combined string \c +/// Result that is ready to be serialized. The \c Result string is comprised of +/// three fields: The first field is the length of the uncompressed strings, and +/// the the second field is the length of the zlib-compressed string. Both +/// fields are encoded in ULEB128. If \c doCompress is false, the +/// third field is the uncompressed strings; otherwise it is the +/// compressed string. When the string compression is off, the +/// second field will have value zero. +Error collectGlobalObjectNameStrings(ArrayRef NameStrs, + bool doCompression, std::string &Result); + /// Given a vector of strings (function PGO names) \c NameStrs, the /// method generates a combined string \c Result that is ready to be /// serialized. The \c Result string is comprised of three fields: @@ -228,10 +261,8 @@ Error collectPGOFuncNameStrings(ArrayRef NameStrs, Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression = true); -/// \c NameStrings is a string composed of one of more sub-strings encoded in -/// the format described above. The substrings are separated by 0 or more zero -/// bytes. This method decodes the string and populates the \c Symtab. -Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); +Error collectVTableStrings(ArrayRef VTables, + std::string &Result, bool doCompression); /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being /// set in IR PGO compilation. @@ -262,7 +293,7 @@ void annotateValueSite(Module &M, Instruction &Inst, /// Extract the value profile data from \p Inst which is annotated with /// value profile meta data. Return false if there is no value data annotated, -/// otherwise return true. +/// otherwise return true. bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, @@ -270,19 +301,48 @@ bool getValueProfDataFromInst(const Instruction &Inst, uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue = false); +/// Extract the value profile data from \p Inst and returns them if \p Inst is +/// annotated with value profile data. Returns nullptr otherwise. It's similar +/// to `getValueProfDataFromInst` above except that an array is allocated only +/// after a preliminary checking that the value profiles of kind `ValueKind` +/// exist. +std::unique_ptr +getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, uint32_t &ActualNumValueData, + uint64_t &TotalC, bool GetNoICPValue = false); + +// TODO: Unify metadata name 'PGOFuncName' and 'PGOName', by supporting read +// of this metadata for backward compatibility and generating 'PGOName' only. +/// Extract the value profile data from \p Inst and returns them if \p Inst is +/// annotated with value profile data. Returns an empty vector otherwise. +SmallVector +getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, uint64_t &TotalC, + bool GetNoICPValue = false); + inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } +inline StringRef getPGONameMetadataName() { return "PGOName"; } + /// Return the PGOFuncName meta data associated with a function. MDNode *getPGOFuncNameMetadata(const Function &F); +std::string getPGOName(const GlobalVariable &V, bool InLTO = false); + /// Create the PGOFuncName meta data if PGOFuncName is different from /// function's raw name. This should only apply to internal linkage functions /// declared by users only. +/// TODO: Update all callers to 'createPGONameMetadata' and deprecate this +/// function. void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); +/// Create the PGOName metadata if a global object's PGO name is different from +/// its mangled name. This should apply to local-linkage global objects only. +void createPGONameMetadata(GlobalObject &GO, StringRef PGOName); + /// Check if we can use Comdat for profile variables. This will eliminate /// the duplicated profile variables for Comdat functions. -bool needsComdatForCounter(const Function &F, const Module &M); +bool needsComdatForCounter(const GlobalObject &GV, const Module &M); /// An enum describing the attributes of an instrumented profile. enum class InstrProfKind { @@ -319,20 +379,22 @@ enum class instrprof_error { too_large, truncated, malformed, - missing_debug_info_for_correlation, - unexpected_debug_info_for_correlation, + missing_correlation_info, + unexpected_correlation_info, unable_to_correlate_profile, unknown_function, invalid_prof, hash_mismatch, count_mismatch, + bitmap_mismatch, counter_overflow, value_site_count_mismatch, compress_failed, uncompress_failed, empty_raw_profile, zlib_unavailable, - raw_profile_version_mismatch + raw_profile_version_mismatch, + counter_value_too_large, }; /// An ordered list of functions identified by their NameRef found in @@ -416,23 +478,59 @@ public: using AddrHashMap = std::vector>; private: + using AddrIntervalMap = + IntervalMap>; StringRef Data; uint64_t Address = 0; - // Unique name strings. + // Unique name strings. Used to ensure entries in MD5NameMap (a vector that's + // going to be sorted) has unique MD5 keys in the first place. StringSet<> NameTab; + // Records the unique virtual table names. This is used by InstrProfWriter to + // write out an on-disk chained hash table of virtual table names. + // InstrProfWriter stores per function profile data (keyed by function names) + // so it doesn't use a StringSet for function names. + StringSet<> VTableNames; // A map from MD5 keys to function name strings. std::vector> MD5NameMap; + // A map from MD5 keys to function define. We only populate this map // when build the Symtab from a Module. std::vector> MD5FuncMap; + // A map from MD5 to the global variable. This map is only populated when + // building the symtab from a module. Use separate container instances for + // `MD5FuncMap` and `MD5VTableMap`. + // TODO: Unify the container type and the lambda function 'mapName' inside + // add{Func,VTable}WithName. + DenseMap MD5VTableMap; // A map from function runtime address to function name MD5 hash. // This map is only populated and used by raw instr profile reader. AddrHashMap AddrToMD5Map; + + AddrIntervalMap::Allocator VTableAddrMapAllocator; + // This map is only populated and used by raw instr profile reader. + AddrIntervalMap VTableAddrMap; bool Sorted = false; - static StringRef getExternalSymbol() { - return "** External Symbol **"; - } + static StringRef getExternalSymbol() { return "** External Symbol **"; } + + // Returns the canonial name of the given PGOName. In a canonical name, all + // suffixes that begins with "." except ".__uniq." are stripped. + // FIXME: Unify this with `FunctionSamples::getCanonicalFnName`. + static StringRef getCanonicalName(StringRef PGOName); + + // Add the function into the symbol table, by creating the following + // map entries: + // name-set = {PGOFuncName} union {getCanonicalName(PGOFuncName)} + // - In MD5NameMap: for name in name-set + // - In MD5FuncMap: for name in name-set + Error addFuncWithName(Function &F, StringRef PGOFuncName); + + // Add the vtable into the symbol table, by creating the following + // map entries: + // name-set = {PGOName} union {getCanonicalName(PGOName)} + // - In MD5NameMap: for name in name-set + // - In MD5VTableMap: for name in name-set + Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName); // If the symtab is created by a series of calls to \c addFuncName, \c // finalizeSymtab needs to be called before looking up function names. @@ -441,7 +539,14 @@ private: inline void finalizeSymtab(); public: - InstrProfSymtab() = default; + InstrProfSymtab() : VTableAddrMap(VTableAddrMapAllocator) {} + + // Not copyable or movable. + // Consider std::unique_ptr for move. + InstrProfSymtab(const InstrProfSymtab &) = delete; + InstrProfSymtab &operator=(const InstrProfSymtab &) = delete; + InstrProfSymtab(InstrProfSymtab &&) = delete; + InstrProfSymtab &operator=(InstrProfSymtab &&) = delete; /// Create InstrProfSymtab from an object file section which /// contains function PGO names. When section may contain raw @@ -451,15 +556,25 @@ public: /// until before it is used. See also \c create(StringRef) method. Error create(object::SectionRef &Section); + /// \c NameStrings is a string composed of one of more sub-strings + /// encoded in the format described in \c collectPGOFuncNameStrings. + /// This method is a wrapper to \c readAndDecodeStrings method. + Error create(StringRef NameStrings); + + /// Initialize symtab states with function names and vtable names. \c + /// FuncNameStrings is a string composed of one or more encoded function name + /// strings, and \c VTableNameStrings composes of one or more encoded vtable + /// names. This interface is solely used by raw profile reader. + Error create(StringRef FuncNameStrings, StringRef VTableNameStrings); + + /// Initialize 'this' with the set of vtable names encoded in + /// \c CompressedVTableNames. + Error initVTableNamesFromCompressedStrings(StringRef CompressedVTableNames); + /// This interface is used by reader of CoverageMapping test /// format. inline Error create(StringRef D, uint64_t BaseAddr); - /// \c NameStrings is a string composed of one of more sub-strings - /// encoded in the format described in \c collectPGOFuncNameStrings. - /// This method is a wrapper to \c readPGOFuncNameStrings method. - inline Error create(StringRef NameStrings); - /// A wrapper interface to populate the PGO symtab with functions /// decls from module \c M. This interface is used by transformation /// passes such as indirect function call promotion. Variable \c InLTO @@ -468,37 +583,79 @@ public: /// Create InstrProfSymtab from a set of names iteratable from /// \p IterRange. This interface is used by IndexedProfReader. - template Error create(const NameIterRange &IterRange); - - /// Update the symtab by adding \p FuncName to the table. This interface - /// is used by the raw and text profile readers. - Error addFuncName(StringRef FuncName) { - if (FuncName.empty()) + template + Error create(const NameIterRange &IterRange); + + /// Create InstrProfSymtab from a set of function names and vtable + /// names iteratable from \p IterRange. This interface is used by + /// IndexedProfReader. + template + Error create(const FuncNameIterRange &FuncIterRange, + const VTableNameIterRange &VTableIterRange); + + // Map the MD5 of the symbol name to the name. + Error addSymbolName(StringRef SymbolName) { + if (SymbolName.empty()) return make_error(instrprof_error::malformed, - "function name is empty"); - auto Ins = NameTab.insert(FuncName); + "symbol name is empty"); + + // Insert into NameTab so that MD5NameMap (a vector that will be sorted) + // won't have duplicated entries in the first place. + auto Ins = NameTab.insert(SymbolName); if (Ins.second) { MD5NameMap.push_back(std::make_pair( - IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + IndexedInstrProf::ComputeHash(SymbolName), Ins.first->getKey())); Sorted = false; } return Error::success(); } + /// The method name is kept since there are many callers. + /// It just forwards to 'addSymbolName'. + Error addFuncName(StringRef FuncName) { return addSymbolName(FuncName); } + + /// Adds VTableName as a known symbol, and inserts it to a map that + /// tracks all vtable names. + Error addVTableName(StringRef VTableName) { + if (Error E = addSymbolName(VTableName)) + return E; + + // Record VTableName. InstrProfWriter uses this set. The comment around + // class member explains why. + VTableNames.insert(VTableName); + return Error::success(); + } + + const StringSet<> &getVTableNames() const { return VTableNames; } + /// Map a function address to its name's MD5 hash. This interface /// is only used by the raw profiler reader. void mapAddress(uint64_t Addr, uint64_t MD5Val) { AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); } + /// Map the address range (i.e., [start_address, end_address)) of a variable + /// to its names' MD5 hash. This interface is only used by the raw profile + /// reader. + void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) { + VTableAddrMap.insert(StartAddr, EndAddr, MD5Val); + } + /// Return a function's hash, or 0, if the function isn't in this SymTab. uint64_t getFunctionHashFromAddress(uint64_t Address); + /// Return a vtable's hash, or 0 if the vtable doesn't exist in this SymTab. + uint64_t getVTableHashFromAddress(uint64_t Address); + /// Return function's PGO name from the function name's symbol /// address in the object file. If an error occurs, return /// an empty string. StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); + /// Return name of functions or global variables from the name's md5 hash + /// value. If not found, return an empty string. + inline StringRef getFuncOrVarName(uint64_t ValMD5Hash); + /// Return function's PGO name from the name's md5 hash value. /// If not found, return an empty string. inline StringRef getFuncName(uint64_t FuncMD5Hash); @@ -508,6 +665,11 @@ public: /// will be represented using the same StringRef value. inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); + /// Just like getFuncOrVarName, except that it will return literal string + /// 'External Symbol' if the function or global variable is external to + /// this symbol table. + inline StringRef getFuncOrVarNameIfDefined(uint64_t ValMD5Hash); + /// True if Symbol is the value used to represent external symbols. static bool isExternalSymbol(const StringRef &Symbol) { return Symbol == InstrProfSymtab::getExternalSymbol(); @@ -521,6 +683,10 @@ public: /// global functions, it returns the same string as getFuncName. inline StringRef getOrigFuncName(uint64_t FuncMD5Hash); + /// Return the global variable corresponding to md5 hash. Return nullptr if + /// not found. + inline GlobalVariable *getGlobalVariable(uint64_t MD5Hash); + /// Return the name section data. inline StringRef getNameData() const { return Data; } @@ -534,10 +700,6 @@ Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { return Error::success(); } -Error InstrProfSymtab::create(StringRef NameStrings) { - return readPGOFuncNameStrings(NameStrings, *this); -} - template Error InstrProfSymtab::create(const NameIterRange &IterRange) { for (auto Name : IterRange) @@ -548,6 +710,24 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) { return Error::success(); } +template +Error InstrProfSymtab::create(const FuncNameIterRange &FuncIterRange, + const VTableNameIterRange &VTableIterRange) { + // Iterate elements by StringRef rather than by const reference. + // StringRef is small enough, so the loop is efficient whether + // element in the range is std::string or StringRef. + for (StringRef Name : FuncIterRange) + if (Error E = addFuncName(Name)) + return E; + + for (StringRef VTableName : VTableIterRange) + if (Error E = addVTableName(VTableName)) + return E; + + finalizeSymtab(); + return Error::success(); +} + void InstrProfSymtab::finalizeSymtab() { if (Sorted) return; @@ -559,6 +739,23 @@ void InstrProfSymtab::finalizeSymtab() { Sorted = true; } +StringRef InstrProfSymtab::getFuncOrVarNameIfDefined(uint64_t MD5Hash) { + StringRef ret = getFuncOrVarName(MD5Hash); + if (ret.empty()) + return InstrProfSymtab::getExternalSymbol(); + return ret; +} + +StringRef InstrProfSymtab::getFuncOrVarName(uint64_t MD5Hash) { + finalizeSymtab(); + auto Result = llvm::lower_bound(MD5NameMap, MD5Hash, + [](const std::pair &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != MD5NameMap.end() && Result->first == MD5Hash) + return Result->second; + return StringRef(); +} + StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { StringRef ret = getFuncName(FuncMD5Hash); if (ret.empty()) @@ -596,6 +793,10 @@ StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) { return PGOName.drop_front(S + 1); } +GlobalVariable *InstrProfSymtab::getGlobalVariable(uint64_t MD5Hash) { + return MD5VTableMap.lookup(MD5Hash); +} + // To store the sums of profile count values, or the percentage of // the sums of the total count values. struct CountSumOrPercent { @@ -694,18 +895,23 @@ struct InstrProfValueSiteRecord { /// Profiling information for a single function. struct InstrProfRecord { std::vector Counts; + std::vector BitmapBytes; InstrProfRecord() = default; InstrProfRecord(std::vector Counts) : Counts(std::move(Counts)) {} + InstrProfRecord(std::vector Counts, + std::vector BitmapBytes) + : Counts(std::move(Counts)), BitmapBytes(std::move(BitmapBytes)) {} InstrProfRecord(InstrProfRecord &&) = default; InstrProfRecord(const InstrProfRecord &RHS) - : Counts(RHS.Counts), + : Counts(RHS.Counts), BitmapBytes(RHS.BitmapBytes), ValueData(RHS.ValueData ? std::make_unique(*RHS.ValueData) : nullptr) {} InstrProfRecord &operator=(InstrProfRecord &&) = default; InstrProfRecord &operator=(const InstrProfRecord &RHS) { Counts = RHS.Counts; + BitmapBytes = RHS.BitmapBytes; if (!RHS.ValueData) { ValueData = nullptr; return *this; @@ -817,6 +1023,7 @@ private: struct ValueProfData { std::vector IndirectCallSites; std::vector MemOPSizes; + std::vector VTableTargets; }; std::unique_ptr ValueData; @@ -839,6 +1046,8 @@ private: return ValueData->IndirectCallSites; case IPVK_MemOPSize: return ValueData->MemOPSizes; + case IPVK_VTableTarget: + return ValueData->VTableTargets; default: llvm_unreachable("Unknown value kind!"); } @@ -853,6 +1062,8 @@ private: return ValueData->IndirectCallSites; case IPVK_MemOPSize: return ValueData->MemOPSizes; + case IPVK_VTableTarget: + return ValueData->VTableTargets; default: llvm_unreachable("Unknown value kind!"); } @@ -884,6 +1095,11 @@ struct NamedInstrProfRecord : InstrProfRecord { NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + NamedInstrProfRecord(StringRef Name, uint64_t Hash, + std::vector Counts, + std::vector BitmapBytes) + : InstrProfRecord(std::move(Counts), std::move(BitmapBytes)), Name(Name), + Hash(Hash) {} static bool hasCSFlagInHash(uint64_t FuncHash) { return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); @@ -1019,7 +1235,11 @@ enum ProfVersion { Version9 = 9, // An additional (optional) temporal profile traces section is added. Version10 = 10, - // The current version is 10. + // An additional field is used for bitmap bytes. + Version11 = 11, + // VTable profiling, + Version12 = 12, + // The current version is 12. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1039,6 +1259,7 @@ struct Header { uint64_t MemProfOffset; uint64_t BinaryIdOffset; uint64_t TemporalProfTracesOffset; + uint64_t VTableNamesOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that // the new field is read correctly. @@ -1157,6 +1378,7 @@ namespace RawInstrProf { // Version 6: Added binary id. // Version 7: Reorder binary id and include version in signature. // Version 8: Use relative counter pointer. +// Version 9: Added relative bitmap bytes pointer and count used by MC/DC. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); @@ -1174,8 +1396,13 @@ template <> inline uint64_t getMagic() { // It should also match the synthesized type in // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. template struct alignas(8) ProfileData { - #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; - #include "llvm/ProfileData/InstrProfData.inc" +#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +template struct alignas(8) VTableProfileData { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" }; // File header structure of the LLVM profile data in raw format. diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h index 2e26a21e8839ba9dc1c95206f1cf775afd6225cd..fa4dc46dba90fbf1b21f0574de15adfe61a42d81 100644 --- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h +++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This file defines InstrProfCorrelator used to generate PGO profiles from -// raw profile data and debug info. +// This file defines InstrProfCorrelator used to generate PGO/coverage profiles +// from raw profile data and debug info/binary file. //===----------------------------------------------------------------------===// #ifndef LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H @@ -31,15 +31,19 @@ class ObjectFile; /// to their functions. class InstrProfCorrelator { public: + /// Indicate if we should use the debug info or profile metadata sections to + /// correlate. + enum ProfCorrelatorKind { NONE, DEBUG_INFO, BINARY }; + static llvm::Expected> - get(StringRef DebugInfoFilename); + get(StringRef Filename, ProfCorrelatorKind FileKind); /// Construct a ProfileData vector used to correlate raw instrumentation data /// to their functions. - virtual Error correlateProfileData() = 0; + virtual Error correlateProfileData(int MaxWarnings) = 0; /// Process debug info and dump the correlation data. - virtual Error dumpYaml(raw_ostream &OS) = 0; + virtual Error dumpYaml(int MaxWarnings, raw_ostream &OS) = 0; /// Return the number of ProfileData elements. std::optional getDataSize() const; @@ -66,11 +70,18 @@ public: protected: struct Context { static llvm::Expected> - get(std::unique_ptr Buffer, const object::ObjectFile &Obj); + get(std::unique_ptr Buffer, const object::ObjectFile &Obj, + ProfCorrelatorKind FileKind); std::unique_ptr Buffer; /// The address range of the __llvm_prf_cnts section. uint64_t CountersSectionStart; uint64_t CountersSectionEnd; + /// The pointer points to start/end of profile data/name sections if + /// FileKind is Binary. + const char *DataStart; + const char *DataEnd; + const char *NameStart; + size_t NameSize; /// True if target and host have different endian orders. bool ShouldSwapBytes; }; @@ -102,7 +113,7 @@ protected: private: static llvm::Expected> - get(std::unique_ptr Buffer); + get(std::unique_ptr Buffer, ProfCorrelatorKind FileKind); const InstrProfCorrelatorKind Kind; }; @@ -126,30 +137,34 @@ public: static llvm::Expected>> get(std::unique_ptr Ctx, - const object::ObjectFile &Obj); + const object::ObjectFile &Obj, ProfCorrelatorKind FileKind); protected: std::vector> Data; - Error correlateProfileData() override; + Error correlateProfileData(int MaxWarnings) override; virtual void correlateProfileDataImpl( + int MaxWarnings, InstrProfCorrelator::CorrelationData *Data = nullptr) = 0; - Error dumpYaml(raw_ostream &OS) override; + virtual Error correlateProfileNameImpl() = 0; + + Error dumpYaml(int MaxWarnings, raw_ostream &OS) override; + + void addDataProbe(uint64_t FunctionName, uint64_t CFGHash, + IntPtrT CounterOffset, IntPtrT FunctionPtr, + uint32_t NumCounters); - void addProbe(StringRef FunctionName, uint64_t CFGHash, IntPtrT CounterOffset, - IntPtrT FunctionPtr, uint32_t NumCounters); + // Byte-swap the value if necessary. + template T maybeSwap(T Value) const { + return Ctx->ShouldSwapBytes ? llvm::byteswap(Value) : Value; + } private: InstrProfCorrelatorImpl(InstrProfCorrelatorKind Kind, std::unique_ptr Ctx) : InstrProfCorrelator(Kind, std::move(Ctx)){}; llvm::DenseSet CounterOffsets; - - // Byte-swap the value if necessary. - template T maybeSwap(T Value) const { - return Ctx->ShouldSwapBytes ? sys::getSwappedBytes(Value) : Value; - } }; /// DwarfInstrProfCorrelator - A child of InstrProfCorrelatorImpl that takes @@ -198,7 +213,32 @@ private: /// NULL /// \endcode void correlateProfileDataImpl( + int MaxWarnings, + InstrProfCorrelator::CorrelationData *Data = nullptr) override; + + Error correlateProfileNameImpl() override; +}; + +/// BinaryInstrProfCorrelator - A child of InstrProfCorrelatorImpl that +/// takes an object file as input to correlate profiles. +template +class BinaryInstrProfCorrelator : public InstrProfCorrelatorImpl { +public: + BinaryInstrProfCorrelator(std::unique_ptr Ctx) + : InstrProfCorrelatorImpl(std::move(Ctx)) {} + + /// Return a pointer to the names string that this class constructs. + const char *getNamesPointer() const { return this->Ctx.NameStart; } + + /// Return the number of bytes in the names string. + size_t getNamesSize() const { return this->Ctx.NameSize; } + +private: + void correlateProfileDataImpl( + int MaxWarnings, InstrProfCorrelator::CorrelationData *Data = nullptr) override; + + Error correlateProfileNameImpl() override; }; } // end namespace llvm diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 94261f4705b96c4ba17d81da46ddbead1815c873..ea5330254e2901d88ccefbe8fd84e423c8204fce 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -76,6 +76,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ Inc->getHash()->getZExtValue())) INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr) +INSTR_PROF_DATA(const IntPtrT, IntPtrTy, BitmapPtr, RelativeBitmapPtr) /* This is used to map function pointers for the indirect call targets to * function name hashes during the conversion from raw to merged profile * data. @@ -87,10 +88,31 @@ INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ - ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) \ +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumBitmapBytes)) #undef INSTR_PROF_DATA /* INSTR_PROF_DATA end. */ +/* For a virtual table object, record the name hash to associate profiled + * addresses with global variables, and record {starting address, size in bytes} + * to map the profiled virtual table (which usually have an offset from the + * starting address) back to a virtual table object. */ +#ifndef INSTR_PROF_VTABLE_DATA +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_VTABLE_DATA_DEFINED +#endif +INSTR_PROF_VTABLE_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), \ + VTableNameHash, ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + IndexedInstrProf::ComputeHash(PGOVTableName))) +INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::PointerType::getUnqual(Ctx), \ + VTablePointer, VTableAddr) +INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ + VTableSizeVal)) +#undef INSTR_PROF_VTABLE_DATA +/* INSTR_PROF_VTABLE_DATA end. */ /* This is an internal data structure used by value profiler. It * is defined here to allow serialization code sharing by LLVM @@ -128,16 +150,20 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) -/* FIXME: A more accurate name is NumData */ -INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) +INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) -/* FIXME: A more accurate name is NumCounters */ -INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +INSTR_PROF_RAW_HEADER(uint64_t, NumCounters, NumCounters) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters) +INSTR_PROF_RAW_HEADER(uint64_t, NumBitmapBytes, NumBitmapBytes) +INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterBitmapBytes, PaddingBytesAfterBitmapBytes) INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin - (uintptr_t)DataBegin) +INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta, + (uintptr_t)BitmapBegin - (uintptr_t)DataBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables) +INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -179,13 +205,26 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target") /* For memory intrinsic functions size profiling. */ VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size") +/* For virtual table address profiling, the address point of the virtual table + * (i.e., the address contained in objects pointing to a virtual table) are + * profiled. Note this may not be the address of the per C++ class virtual table + * object (e.g., there might be an offset). + * + * The profiled addresses are stored in raw profile, together with the following + * two types of information. + * 1. The (starting and ending) addresses of per C++ class virtual table objects. + * 2. The (compressed) virtual table object names. + * RawInstrProfReader converts profiled virtual table addresses to virtual table + * objects' MD5 hash. + */ +VALUE_PROF_KIND(IPVK_VTableTarget, 2, "The profiled address point of the vtable") /* These two kinds must be the last to be * declared. This is to make sure the string * array created with the template can be * indexed with the kind value. */ VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first") -VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last") +VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last") #undef VALUE_PROF_KIND /* VALUE_PROF_KIND end */ @@ -269,15 +308,24 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ + INSTR_PROF_BITS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_name, \ INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ INSTR_PROF_NAME_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vname, \ + INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \ + INSTR_PROF_VNAME_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_vals, \ INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \ INSTR_PROF_VALS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \ INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \ INSTR_PROF_VNODES_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vtab, \ + INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \ + INSTR_PROF_VTAB_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_covmap, \ INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \ INSTR_PROF_COVMAP_COFF, "__LLVM_COV,") @@ -287,6 +335,12 @@ INSTR_PROF_SECT_ENTRY(IPSK_covfun, \ INSTR_PROF_SECT_ENTRY(IPSK_orderfile, \ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON), \ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_covdata, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVDATA_COMMON), \ + INSTR_PROF_COVDATA_COFF, "__LLVM_COV,") +INSTR_PROF_SECT_ENTRY(IPSK_covname, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVNAME_COMMON), \ + INSTR_PROF_COVNAME_COFF, "__LLVM_COV,") #undef INSTR_PROF_SECT_ENTRY #endif @@ -646,17 +700,16 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 -/* FIXME: Please remedy the fixme in the header before bumping the version. */ /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 8 +#define INSTR_PROF_RAW_VERSION 10 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 10 +#define INSTR_PROF_INDEX_VERSION 12 /* Coverage mapping format version (start from 0). */ -#define INSTR_PROF_COVMAP_VERSION 5 +#define INSTR_PROF_COVMAP_VERSION 6 -/* Profile version is always of type uint64_t. Reserve the upper 8 bits in the - * version for other variants of profile. We set the lowest bit of the upper 8 - * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation +/* Profile version is always of type uint64_t. Reserve the upper 32 bits in the + * version for other variants of profile. We set the 8th most significant bit + * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. * The 59th bit indicates whether to use debug info to correlate profiles. @@ -665,7 +718,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * The 62nd bit indicates whether memory profile information is present. * The 63rd bit indicates if this is a temporal profile. */ -#define VARIANT_MASKS_ALL 0xff00000000000000ULL +#define VARIANT_MASKS_ALL 0xffffffff00000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) @@ -688,22 +741,35 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, than WIN32 */ #define INSTR_PROF_DATA_COMMON __llvm_prf_data #define INSTR_PROF_NAME_COMMON __llvm_prf_names +#define INSTR_PROF_VNAME_COMMON __llvm_prf_vns #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds +#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab #define INSTR_PROF_COVMAP_COMMON __llvm_covmap #define INSTR_PROF_COVFUN_COMMON __llvm_covfun +#define INSTR_PROF_COVDATA_COMMON __llvm_covdata +#define INSTR_PROF_COVNAME_COMMON __llvm_covnames #define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile /* Windows section names. Because these section names contain dollar characters, * they must be quoted. */ #define INSTR_PROF_DATA_COFF ".lprfd$M" #define INSTR_PROF_NAME_COFF ".lprfn$M" +#define INSTR_PROF_VNAME_COFF ".lprfvn$M" #define INSTR_PROF_CNTS_COFF ".lprfc$M" +#define INSTR_PROF_BITS_COFF ".lprfb$M" #define INSTR_PROF_VALS_COFF ".lprfv$M" #define INSTR_PROF_VNODES_COFF ".lprfnd$M" +#define INSTR_PROF_VTAB_COFF ".lprfvt$M" #define INSTR_PROF_COVMAP_COFF ".lcovmap$M" #define INSTR_PROF_COVFUN_COFF ".lcovfun$M" +/* Since cov data and cov names sections are not allocated, we don't need to + * access them at runtime. + */ +#define INSTR_PROF_COVDATA_COFF ".lcovd" +#define INSTR_PROF_COVNAME_COFF ".lcovn" #define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M" #ifdef _WIN32 @@ -711,6 +777,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_BITS_COFF +#define INSTR_PROF_VTAB_SECT_NAME INSTR_PROF_VTAB_COFF +#define INSTR_PROF_VNAME_SECT_NAME INSTR_PROF_VNAME_COFF /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ @@ -719,12 +788,17 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF #define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF #define INSTR_PROF_COVFUN_SECT_NAME INSTR_PROF_COVFUN_COFF +#define INSTR_PROF_COVDATA_SECT_NAME INSTR_PROF_COVDATA_COFF +#define INSTR_PROF_COVNAME_SECT_NAME INSTR_PROF_COVNAME_COFF #define INSTR_PROF_ORDERFILE_SECT_NAME INSTR_PROF_ORDERFILE_COFF #else /* Runtime section names and name strings. */ #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON) #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON) #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON) +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON) +#define INSTR_PROF_VTAB_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON) +#define INSTR_PROF_VNAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON) /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ @@ -733,6 +807,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON) #define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON) #define INSTR_PROF_COVFUN_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVFUN_COMMON) +#define INSTR_PROF_COVDATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVDATA_COMMON) +#define INSTR_PROF_COVNAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVNAME_COMMON) /* Order file instrumentation. */ #define INSTR_PROF_ORDERFILE_SECT_NAME \ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON) diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 80c5284d8a7dde640e246442c9a16fe9ed28055a..206254c569d18c3e8a0793c8ed46d6c16648fb4f 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -123,9 +123,6 @@ public: virtual bool instrEntryBBEnabled() const = 0; - /// Return true if we must provide debug info to create PGO profiles. - virtual bool useDebugInfoCorrelate() const { return false; } - /// Return true if the profile has single byte counters representing coverage. virtual bool hasSingleByteCoverage() const = 0; @@ -202,11 +199,13 @@ public: /// instrprof file. static Expected> create(const Twine &Path, vfs::FileSystem &FS, - const InstrProfCorrelator *Correlator = nullptr); + const InstrProfCorrelator *Correlator = nullptr, + std::function Warn = nullptr); static Expected> create(std::unique_ptr Buffer, - const InstrProfCorrelator *Correlator = nullptr); + const InstrProfCorrelator *Correlator = nullptr, + std::function Warn = nullptr); /// \param Weight for raw profiles use this as the temporal profile trace /// weight @@ -318,35 +317,45 @@ private: /// A list of timestamps paired with a function name reference. std::vector> TemporalProfTimestamps; bool ShouldSwapBytes; - // The value of the version field of the raw profile data header. The lower 56 - // bits specifies the format version and the most significant 8 bits specify + // The value of the version field of the raw profile data header. The lower 32 + // bits specifies the format version and the most significant 32 bits specify // the variant types of the profile. uint64_t Version; uint64_t CountersDelta; + uint64_t BitmapDelta; uint64_t NamesDelta; const RawInstrProf::ProfileData *Data; const RawInstrProf::ProfileData *DataEnd; + const RawInstrProf::VTableProfileData *VTableBegin = nullptr; + const RawInstrProf::VTableProfileData *VTableEnd = nullptr; const char *CountersStart; const char *CountersEnd; + const char *BitmapStart; + const char *BitmapEnd; const char *NamesStart; const char *NamesEnd; + const char *VNamesStart = nullptr; + const char *VNamesEnd = nullptr; // After value profile is all read, this pointer points to // the header of next profile data (if exists) const uint8_t *ValueDataStart; uint32_t ValueKindLast; uint32_t CurValueDataSize; + std::vector BinaryIds; - /// Total size of binary ids. - uint64_t BinaryIdsSize{0}; - /// Start address of binary id length and data pairs. - const uint8_t *BinaryIdsStart; + std::function Warn; + + /// Maxium counter value 2^56. + static const uint64_t MaxCounterValue = (1ULL << 56); public: RawInstrProfReader(std::unique_ptr DataBuffer, - const InstrProfCorrelator *Correlator) + const InstrProfCorrelator *Correlator, + std::function Warn) : DataBuffer(std::move(DataBuffer)), Correlator(dyn_cast_or_null>( - Correlator)) {} + Correlator)), + Warn(Warn) {} RawInstrProfReader(const RawInstrProfReader &) = delete; RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; @@ -370,10 +379,6 @@ public: return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; } - bool useDebugInfoCorrelate() const override { - return (Version & VARIANT_MASK_DBG_CORRELATE) != 0; - } - bool hasSingleByteCoverage() const override { return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0; } @@ -409,7 +414,7 @@ private: Error readHeader(const RawInstrProf::Header &Header); template IntT swap(IntT Int) const { - return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; + return ShouldSwapBytes ? llvm::byteswap(Int) : Int; } support::endianness getDataEndianness() const { @@ -429,6 +434,7 @@ private: Error readName(NamedInstrProfRecord &Record); Error readFuncHash(NamedInstrProfRecord &Record); Error readRawCounts(InstrProfRecord &Record); + Error readRawBitmapBytes(InstrProfRecord &Record); Error readValueProfilingData(InstrProfRecord &Record); bool atEnd() const { return Data == DataEnd; } @@ -441,6 +447,7 @@ private: // As we advance to the next record, we maintain the correct CountersDelta // with respect to the next record. CountersDelta -= sizeof(*Data); + BitmapDelta -= sizeof(*Data); } Data++; ValueDataStart += CurValueDataSize; @@ -618,6 +625,12 @@ public: InstrProfKind getProfileKind() const override; Error populateSymtab(InstrProfSymtab &Symtab) override { + // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of + // arrays/maps. Since there are other data sources other than 'HashTable' to + // populate a symtab, it might make sense to have something like this + // 1. Let each data source populate Symtab and init the arrays/maps without + // calling 'finalizeSymtab' + // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed. return Symtab.create(HashTable->keys()); } }; @@ -652,6 +665,15 @@ private: std::unique_ptr MemProfRecordTable; /// MemProf frame profile data on-disk indexed via frame id. std::unique_ptr MemProfFrameTable; + /// VTableNamePtr points to the beginning of compressed vtable names. + /// When a symtab is constructed from profiles by llvm-profdata, the list of + /// names could be decompressed based on `VTableNamePtr` and + /// `CompressedVTableNamesLen`. + /// A compiler that reads indexed profiles could construct symtab from module + /// IR so it doesn't need the decompressed names. + const char *VTableNamePtr = nullptr; + /// The length of compressed vtable names. + uint64_t CompressedVTableNamesLen = 0; /// Total size of binary ids. uint64_t BinaryIdsSize{0}; /// Start address of binary id length and data pairs. @@ -716,9 +738,12 @@ public: /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr, /// the sum of all counters in the mismatched function will be set to /// MismatchedFuncSum. If there are multiple instances of mismatched - /// functions, MismatchedFuncSum returns the maximum. + /// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not + /// found, try to lookup \c DeprecatedFuncName to handle profiles built by + /// older compilers. Expected getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, + StringRef DeprecatedFuncName = "", uint64_t *MismatchedFuncSum = nullptr); /// Return the memprof record for the function identified by @@ -729,6 +754,10 @@ public: Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); + /// Fill Bitmap Bytes with the profile data for the given function name. + Error getFunctionBitmap(StringRef FuncName, uint64_t FuncHash, + BitVector &Bitmap); + /// Return the maximum of all known function counts. /// \c UseCS indicates whether to use the context-sensitive count. uint64_t getMaximumFunctionCount(bool UseCS) { diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index e50705ee053eea8bd1e6845032fdc2c525b410eb..b76e57faa53758f00342e1b120c5fc07ae6802e9 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -64,15 +64,26 @@ private: // List of binary ids. std::vector BinaryIds; + // Read the vtable names from raw instr profile reader. + StringSet<> VTableNames; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. InstrProfRecordWriterTrait *InfoObj; + // Temporary support for writing the previous version of the format, to enable + // some forward compatibility. Currently this suppresses the writing of the + // new vtable names section and header fields. + // TODO: Consider enabling this with future version changes as well, to ease + // deployment of newer versions of llvm-profdata. + bool WritePrevVersion = false; + public: InstrProfWriter(bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0, - uint64_t MaxTemporalProfTraceLength = 0); + uint64_t MaxTemporalProfTraceLength = 0, + bool WritePrevVersion = false); ~InstrProfWriter(); StringMap &getProfileData() { return FunctionData; } @@ -85,6 +96,7 @@ public: void addRecord(NamedInstrProfRecord &&I, function_ref Warn) { addRecord(std::move(I), 1, Warn); } + void addVTableName(StringRef VTableName) { VTableNames.insert(VTableName); } /// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the /// total number of temporal profiling traces the source has seen. diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index 5788ab3b21380dbaeaeaa7a37b7348e28b30d4c5..3e2f0d9b43fc0d4b4fee2317da035356f80e1001 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -53,7 +53,7 @@ void setCurrentDebugType(const char *Type); void setCurrentDebugTypes(const char **Types, unsigned Count); /// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug -/// information. In the '-debug' option is specified on the commandline, and if +/// information. If the '-debug' option is specified on the commandline, and if /// this is a debug build, then the code specified as the option to the macro /// will be executed. Otherwise it will not be. Example: /// @@ -92,7 +92,7 @@ extern bool EnableDebugBuffering; raw_ostream &dbgs(); // DEBUG macro - This macro should be used by passes to emit debug information. -// In the '-debug' option is specified on the commandline, and if this is a +// If the '-debug' option is specified on the commandline, and if this is a // debug build, then the code specified as the option to the macro will be // executed. Otherwise it will not be. Example: // diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index cb0c055dcb74ae8cf24ec88391fad58389a124e7..8399f894acd7d9658e9d249053802c5f5d8bc153 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -50,18 +50,25 @@ private: uint32_t NumValueSites[IPVK_Last + 1]; GlobalVariable *RegionCounters = nullptr; GlobalVariable *DataVar = nullptr; + GlobalVariable *RegionBitmaps = nullptr; PerFunctionProfileData() { memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last + 1)); } }; DenseMap ProfileDataMap; + // Key is virtual table variable, value is 'VTableProfData' in the form of + // GlobalVariable. + DenseMap VTableDataMap; /// If runtime relocation is enabled, this maps functions to the load /// instruction that produces the profile relocation bias. DenseMap FunctionToProfileBiasMap; std::vector CompilerUsedVars; std::vector UsedVars; std::vector ReferencedNames; + // The list of virtual table variables of which the VTableProfData is + // collected. + std::vector ReferencedVTables; GlobalVariable *NamesVar; size_t NamesSize; @@ -105,23 +112,68 @@ private: /// Force emitting of name vars for unused functions. void lowerCoverageData(GlobalVariable *CoverageNamesVar); + /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction + /// using the index represented by the a temp value into a bitmap. + void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins); + + /// Replace instrprof.mcdc.temp.update with a shift and or instruction using + /// the corresponding condition ID. + void lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate *Ins); + /// Compute the address of the counter value that this profiling instruction /// acts on. - Value *getCounterAddress(InstrProfInstBase *I); + Value *getCounterAddress(InstrProfCntrInstBase *I); /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables /// referring to them will also be created. - GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc); + GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc); /// Create the region counters. - GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name, + GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc, + StringRef Name, GlobalValue::LinkageTypes Linkage); + /// Compute the address of the test vector bitmap that this profiling + /// instruction acts on. + Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I); + + /// Get the region bitmaps for an increment, creating them if necessary. + /// + /// If the bitmap array doesn't yet exist, the profile data variables + /// referring to them will also be created. + GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc); + + /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with + /// an MC/DC Decision region. The number of bytes required is indicated by + /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called + /// as part of setupProfileSection() and is conceptually very similar to + /// what is done for profile data counters in createRegionCounters(). + GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, + StringRef Name, + GlobalValue::LinkageTypes Linkage); + + /// Set Comdat property of GV, if required. + void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName); + + /// Setup the sections into which counters and bitmaps are allocated. + GlobalVariable *setupProfileSection(InstrProfInstBase *Inc, + InstrProfSectKind IPSK); + + /// Create INSTR_PROF_DATA variable for counters and bitmaps. + void createDataVariable(InstrProfCntrInstBase *Inc, + InstrProfMCDCBitmapParameters *Update); + + /// Get the counters for virtual table values, creating them if necessary. + void getOrCreateVTableProfData(GlobalVariable *GV); + /// Emit the section with compressed function names. void emitNameData(); + /// Emit the section with compressed vtable names. + void emitVTableNames(); + /// Emit value nodes section for value profiling. void emitVNodes(); diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index c77d3214ed010d757e65be21704d452b707c1ede..6d2ad3d757442b40666b1c02536f44c558c8bc01 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -23,6 +23,8 @@ namespace llvm { +extern cl::opt DebugInfoCorrelate; + class Function; class Instruction; class Module; diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h index fcb384ec361339d06f7a41e21224b7776161ba3b..385831f457038d4e6e792d669faa66a16832fb03 100644 --- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h @@ -15,9 +15,12 @@ #define LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H namespace llvm { +template class ArrayRef; +class Constant; class CallBase; class CastInst; class Function; +class Instruction; class MDNode; class Value; @@ -41,7 +44,9 @@ bool isLegalToPromote(const CallBase &CB, Function *Callee, CallBase &promoteCall(CallBase &CB, Function *Callee, CastInst **RetBitCast = nullptr); -/// Promote the given indirect call site to conditionally call \p Callee. +/// Promote the given indirect call site to conditionally call \p Callee. The +/// promoted direct call instruction is predicated on `CB.getCalledOperand() == +/// Callee`. /// /// This function creates an if-then-else structure at the location of the call /// site. The original call site is moved into the "else" block. A clone of the @@ -51,6 +56,22 @@ CallBase &promoteCall(CallBase &CB, Function *Callee, CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights = nullptr); +/// This is similar to `promoteCallWithIfThenElse` except that the condition to +/// promote a virtual call is that \p VPtr is the same as any of \p +/// AddressPoints. +/// +/// This function is expected to be used on virtual calls (a subset of indirect +/// calls). \p VPtr is the virtual table address stored in the objects, and +/// \p AddressPoints contains vtable address points. A vtable address point is +/// a location inside the vtable that's referenced by vpointer in C++ objects. +/// +/// TODO: sink the address-calculation instructions of indirect callee to the +/// indirect call fallback after transformation. +CallBase &promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, + Function *Callee, + ArrayRef AddressPoints, + MDNode *BranchWeights); + /// Try to promote (devirtualize) a virtual call on an Alloca. Return true on /// success. /// @@ -76,11 +97,11 @@ bool tryPromoteCall(CallBase &CB); /// Predicate and clone the given call site. /// -/// This function creates an if-then-else structure at the location of the call -/// site. The "if" condition compares the call site's called value to the given -/// callee. The original call site is moved into the "else" block, and a clone -/// of the call site is placed in the "then" block. The cloned instruction is -/// returned. +/// This function creates an if-then-else structure at the location of the +/// call site. The "if" condition compares the call site's called value to +/// the given callee. The original call site is moved into the "else" block, +/// and a clone of the call site is placed in the "then" block. The cloned +/// instruction is returned. CallBase &versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights); } // end namespace llvm diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index ebfa1c8fc08e1c630e0f5cb91dc6280329e64a4e..c4f896311a2d2d34b81d14708902e0e789623da6 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -45,6 +45,10 @@ static cl::opt cl::desc("Max number of promotions for a single indirect " "call callsite")); +cl::opt MaxNumVTableAnnotations( + "icp-max-num-vtables", cl::init(6), cl::Hidden, + cl::desc("Max number of vtables annotated for a vtable load instruction.")); + ICallPromotionAnalysis::ICallPromotionAnalysis() { ValueDataArray = std::make_unique(MaxNumPromotions); } @@ -83,17 +87,17 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( return I; } -ArrayRef +MutableArrayRef ICallPromotionAnalysis::getPromotionCandidatesForInstruction( - const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, - uint32_t &NumCandidates) { - bool Res = + const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) { + uint32_t NumVals; + auto Res = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, ValueDataArray.get(), NumVals, TotalCount); if (!Res) { NumCandidates = 0; - return ArrayRef(); + return MutableArrayRef(); } NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount); - return ArrayRef(ValueDataArray.get(), NumVals); + return MutableArrayRef(ValueDataArray.get(), NumVals); } diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 2076ed48ea342aa6fec4b15c7c2d30ea4c5401f7..b8b3215bd134f95f92f4c6e1f842fd45f88b635d 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -82,6 +82,8 @@ static cl::opt ModuleSummaryDotFile( extern cl::opt ScalePartialSampleProfileWorkingSetSize; +extern cl::opt MaxNumVTableAnnotations; + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -124,6 +126,24 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, Worklist.push_back(Operand); } } + + const Instruction *I = dyn_cast(CurUser); + if (I) { + uint32_t ActualNumValueData = 0; + uint64_t TotalCount = 0; + // MaxNumVTableAnnotations is the maximum number of vtables annotated on + // the instruction. + auto ValueDataArray = + getValueProfDataFromInst(*I, IPVK_VTableTarget, MaxNumVTableAnnotations, + ActualNumValueData, TotalCount); + + if (ValueDataArray.get()) { + for (uint32_t j = 0; j < ActualNumValueData; j++) { + RefEdges.insert(Index.getOrInsertValueInfo(/* VTableGUID = */ + ValueDataArray[j].Value)); + } + } + } return HasBlockAddress; } @@ -424,11 +444,11 @@ static void computeFunctionSummary( } } - uint32_t NumVals, NumCandidates; + uint32_t NumCandidates; uint64_t TotalCount; auto CandidateProfileData = - ICallAnalysis.getPromotionCandidatesForInstruction( - &I, NumVals, TotalCount, NumCandidates); + ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount, + NumCandidates); for (const auto &Candidate : CandidateProfileData) CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)] .updateHotness(getHotness(Candidate.Count, PSI)); diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp index bbaee06ed8a552d97d7641dd0396902a721d82ad..b8dcc39e9223c925278d2933e87d717aa1857e90 100644 --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -201,6 +201,26 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, return nullptr; } +std::pair +llvm::getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, + Module &M) { + Constant *Ptr = getPointerAtOffset(GV->getInitializer(), Offset, M, GV); + if (!Ptr) + return std::pair(nullptr, nullptr); + + auto C = Ptr->stripPointerCasts(); + // Make sure this is a function or alias to a function. + auto Fn = dyn_cast(C); + auto A = dyn_cast(C); + if (!Fn && A) + Fn = dyn_cast(A->getAliasee()); + + if (!Fn) + return std::pair(nullptr, nullptr); + + return std::pair(Fn, C); +} + void llvm::replaceRelativePointerUsersWithZero(Function *F) { for (auto *U : F->users()) { auto *PtrExpr = dyn_cast(U); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index d5bcd327a9b7d6d079d4296fcd63a96e13fbca61..140a1624c6d594ffd9343717d9e1e71ee0c2113c 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -198,7 +198,7 @@ public: for (const auto &GUIDSummaryLists : *Index) // Examine all summaries for this GUID. for (auto &Summary : GUIDSummaryLists.second.SummaryList) - if (auto FS = dyn_cast(Summary.get())) + if (auto FS = dyn_cast(Summary.get())) { // For each call in the function summary, see if the call // is to a GUID (which means it is for an indirect call, // otherwise we would have a Value for it). If so, synthesize @@ -206,6 +206,15 @@ public: for (auto &CallEdge : FS->calls()) if (!CallEdge.first.haveGVs() || !CallEdge.first.getValue()) assignValueId(CallEdge.first.getGUID()); + + // For each referenced variables in the function summary, see if the + // variable is represented by a GUID (as opposed to a symbol to + // declarations or definitions in the module). If so, synthesize a + // value id. + for (auto &RefEdge : FS->refs()) + if (!RefEdge.haveGVs() || !RefEdge.getValue()) + assignValueId(RefEdge.getGUID()); + } } protected: @@ -3996,7 +4005,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( NameVals.push_back(SpecialRefCnts.second); // worefcnt for (auto &RI : FS->refs()) - NameVals.push_back(VE.getValueID(RI.getValue())); + NameVals.push_back(getValueId(RI)); bool HasProfileData = F.hasProfileData() || ForceSummaryEdgesCold != FunctionSummary::FSHT_None; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 20c37eb4cb11d518a9cc92cf99d2af12fcb22468..6e11bd28a683df077ebe622ca289193ea351ec3f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7153,6 +7153,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, llvm_unreachable("instrprof failed to lower a timestamp"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); + case Intrinsic::instrprof_mcdc_parameters: + llvm_unreachable("instrprof failed to lower mcdc parameters"); + case Intrinsic::instrprof_mcdc_tvbitmap_update: + llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update"); + case Intrinsic::instrprof_mcdc_condbitmap_update: + llvm_unreachable("instrprof failed to lower an mcdc condbitmap update"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index c336c933ffa80813b888907f221d64aeb23021ec..ccff862a6a4b7fe3500d37c96b8060d49dcfa82d 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -479,6 +479,10 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { /*AddSegmentInfo=*/false) || Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF, /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covdata, Triple::ELF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covname, Triple::ELF, + /*AddSegmentInfo=*/false) || Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index e614285df07ae9a2e5176aac53a29d31b25c3322..918495bdae4684bb4faefc46cee41fd6c3fff083 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1128,6 +1128,18 @@ LandingPadInst *InvokeInst::getLandingPadInst() const { return cast(getUnwindDest()->getFirstNonPHI()); } +void InvokeInst::updateProfWeight(uint64_t S, uint64_t T) { + if (T == 0) { + LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in " + "div by 0. Ignoring. Likely the function " + << getParent()->getParent()->getName() + << " has 0 entry count, and contains call instruction " + "with non-zero prof info."); + return; + } + scaleProfData(*this, S, T); +} + //===----------------------------------------------------------------------===// // CallBrInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 36d56699c64e931d4fd75e0fff60d0098d727e7d..4270247ef0f590d6a7d32c4096afbb22a9c7ac84 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -270,13 +270,13 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, return -1; } -ConstantInt *InstrProfInstBase::getNumCounters() const { +ConstantInt *InstrProfCntrInstBase::getNumCounters() const { if (InstrProfValueProfileInst::classof(this)) llvm_unreachable("InstrProfValueProfileInst does not have counters!"); return cast(const_cast(getArgOperand(2))); } -ConstantInt *InstrProfInstBase::getIndex() const { +ConstantInt *InstrProfCntrInstBase::getIndex() const { if (InstrProfValueProfileInst::classof(this)) llvm_unreachable("Please use InstrProfValueProfileInst::getIndex()"); return cast(const_cast(getArgOperand(3))); diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 2490b3012bdc2b4b25026a67d18a2c7e69ac213f..7581ebecdaaf323ed13c0be73f9a9368e5422c65 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -35,19 +35,23 @@ MDNode *MDBuilder::createFPMath(float Accuracy) { } MDNode *MDBuilder::createBranchWeights(uint32_t TrueWeight, - uint32_t FalseWeight) { - return createBranchWeights({TrueWeight, FalseWeight}); + uint32_t FalseWeight, bool IsExpected) { + return createBranchWeights({TrueWeight, FalseWeight}, IsExpected); } -MDNode *MDBuilder::createBranchWeights(ArrayRef Weights) { +MDNode *MDBuilder::createBranchWeights(ArrayRef Weights, + bool IsExpected) { assert(Weights.size() >= 1 && "Need at least one branch weights!"); - SmallVector Vals(Weights.size() + 1); + unsigned int Offset = IsExpected ? 2 : 1; + SmallVector Vals(Weights.size() + Offset); Vals[0] = createString("branch_weights"); + if (IsExpected) + Vals[1] = createString("expected"); Type *Int32Ty = Type::getInt32Ty(Context); for (unsigned i = 0, e = Weights.size(); i != e; ++i) - Vals[i + 1] = createConstant(ConstantInt::get(Int32Ty, Weights[i])); + Vals[i + Offset] = createConstant(ConstantInt::get(Int32Ty, Weights[i])); return MDNode::get(Context, Vals); } diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index e534368b05e41643607f5ca28c1115a5c5cd3998..5d44b06058232db1881ea5e3efb9e80bb5980deb 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -45,25 +46,8 @@ constexpr unsigned WeightsIdx = 1; // the minimum number of operands for MD_prof nodes with branch weights constexpr unsigned MinBWOps = 3; -bool extractWeights(const MDNode *ProfileData, - SmallVectorImpl &Weights) { - // Assume preconditions are already met (i.e. this is valid metadata) - assert(ProfileData && "ProfileData was nullptr in extractWeights"); - unsigned NOps = ProfileData->getNumOperands(); - - assert(WeightsIdx < NOps && "Weights Index must be less than NOps."); - Weights.resize(NOps - WeightsIdx); - - for (unsigned Idx = WeightsIdx, E = NOps; Idx != E; ++Idx) { - ConstantInt *Weight = - mdconst::dyn_extract(ProfileData->getOperand(Idx)); - assert(Weight && "Malformed branch_weight in MD_prof node"); - assert(Weight->getValue().getActiveBits() <= 32 && - "Too many bits for uint32_t"); - Weights[Idx - WeightsIdx] = Weight->getZExtValue(); - } - return true; -} +// the minimum number of operands for MD_prof nodes with value profiles +constexpr unsigned MinVPOps = 5; // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. @@ -84,6 +68,27 @@ bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { return ProfDataName->getString().equals(Name); } +template >> +static void extractFromBranchWeightMD(const MDNode *ProfileData, + SmallVectorImpl &Weights) { + assert(isBranchWeightMD(ProfileData) && "wrong metadata"); + + unsigned NOps = ProfileData->getNumOperands(); + unsigned WeightsIdx = getBranchWeightOffset(ProfileData); + assert(WeightsIdx < NOps && "Weights Index must be less than NOps."); + Weights.resize(NOps - WeightsIdx); + + for (unsigned Idx = WeightsIdx, E = NOps; Idx != E; ++Idx) { + ConstantInt *Weight = + mdconst::dyn_extract(ProfileData->getOperand(Idx)); + assert(Weight && "Malformed branch_weight in MD_prof node"); + assert(Weight->getValue().getActiveBits() <= (sizeof(T) * 8) && + "Too many bits for MD_prof branch_weight"); + Weights[Idx - WeightsIdx] = Weight->getZExtValue(); + } +} + } // namespace namespace llvm { @@ -96,15 +101,53 @@ bool isBranchWeightMD(const MDNode *ProfileData) { return isTargetMD(ProfileData, "branch_weights", MinBWOps); } +bool isValueProfileMD(const MDNode *ProfileData) { + return isTargetMD(ProfileData, "VP", MinVPOps); +} + bool hasBranchWeightMD(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return isBranchWeightMD(ProfileData); } +bool hasCountTypeMD(const Instruction &I) { + auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); + // Value profiles record count-type information. + if (isValueProfileMD(ProfileData)) + return true; + // Conservatively assume non CallBase instruction only get taken/not-taken + // branch probability, so not interpret them as count. + return isa(I) && !isBranchWeightMD(ProfileData); +} + bool hasValidBranchWeightMD(const Instruction &I) { return getValidBranchWeightMDNode(I); } +bool hasBranchWeightOrigin(const Instruction &I) { + auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); + return hasBranchWeightOrigin(ProfileData); +} + +bool hasBranchWeightOrigin(const MDNode *ProfileData) { + if (!isBranchWeightMD(ProfileData)) + return false; + auto *ProfDataName = dyn_cast(ProfileData->getOperand(1)); + // NOTE: if we ever have more types of branch weight provenance, + // we need to check the string value is "expected". For now, we + // supply a more generic API, and avoid the spurious comparisons. + assert(ProfDataName == nullptr || ProfDataName->getString() == "expected"); + return ProfDataName != nullptr; +} + +unsigned getBranchWeightOffset(const MDNode *ProfileData) { + return hasBranchWeightOrigin(ProfileData) ? 2 : 1; +} + +unsigned getNumBranchWeights(const MDNode &ProfileData) { + return ProfileData.getNumOperands() - getBranchWeightOffset(&ProfileData); +} + MDNode *getBranchWeightMDNode(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); if (!isBranchWeightMD(ProfileData)) @@ -119,11 +162,22 @@ MDNode *getValidBranchWeightMDNode(const Instruction &I) { return nullptr; } +void extractFromBranchWeightMD32(const MDNode *ProfileData, + SmallVectorImpl &Weights) { + extractFromBranchWeightMD(ProfileData, Weights); +} + +void extractFromBranchWeightMD64(const MDNode *ProfileData, + SmallVectorImpl &Weights) { + extractFromBranchWeightMD(ProfileData, Weights); +} + bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl &Weights) { if (!isBranchWeightMD(ProfileData)) return false; - return extractWeights(ProfileData, Weights); + extractFromBranchWeightMD(ProfileData, Weights); + return true; } bool extractBranchWeights(const Instruction &I, @@ -184,4 +238,63 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setBranchWeights(Instruction &I, ArrayRef Weights, + bool IsExpected) { + MDBuilder MDB(I.getContext()); + MDNode *BranchWeights = MDB.createBranchWeights(Weights, IsExpected); + I.setMetadata(LLVMContext::MD_prof, BranchWeights); +} +void scaleProfData(Instruction &I, uint64_t S, uint64_t T) { + assert(T != 0 && "Caller should guarantee"); + auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); + if (ProfileData == nullptr) + return; + + auto *ProfDataName = dyn_cast(ProfileData->getOperand(0)); + if (!ProfDataName || (ProfDataName->getString() != "branch_weights" && + ProfDataName->getString() != "VP")) + return; + + if (!hasCountTypeMD(I)) + return; + + LLVMContext &C = I.getContext(); + + MDBuilder MDB(C); + SmallVector Vals; + Vals.push_back(ProfileData->getOperand(0)); + APInt APS(128, S), APT(128, T); + if (ProfDataName->getString() == "branch_weights" && + ProfileData->getNumOperands() > 0) { + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, + mdconst::dyn_extract( + ProfileData->getOperand(getBranchWeightOffset(ProfileData))) + ->getValue() + .getZExtValue()); + Val *= APS; + Vals.push_back(MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX)))); + } else if (ProfDataName->getString() == "VP") + for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) { + // The first value is the key of the value profile, which will not change. + Vals.push_back(ProfileData->getOperand(i)); + uint64_t Count = + mdconst::dyn_extract(ProfileData->getOperand(i + 1)) + ->getValue() + .getZExtValue(); + // Don't scale the magic number. + if (Count == NOMORE_ICP_MAGICNUM) { + Vals.push_back(ProfileData->getOperand(i + 1)); + continue; + } + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, Count); + Val *= APS; + Vals.push_back(MDB.createConstant(ConstantInt::get( + Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue()))); + } + I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals)); +} + } // namespace llvm diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 05737323314a8ad3318565d4685ae71306aeae97..b84f20cc0d413b4de0fcb46ace0994fa432c4b24 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -454,9 +454,13 @@ Error InstrProfSymtab::create(SectionRef &Section) { // If this is a linked PE/COFF file, then we have to skip over the null byte // that is allocated in the .lprfn$A section in the LLVM profiling runtime. + // If the name section is .lprfcovnames, it doesn't have the null byte at the + // beginning. const ObjectFile *Obj = Section.getObject(); if (isa(Obj) && !Obj->isRelocatableObject()) - Data = Data.drop_front(1); + if (Expected NameOrErr = Section.getName()) + if (*NameOrErr != getInstrProfSectionName(IPSK_covname, Triple::COFF)) + Data = Data.drop_front(1); return Error::success(); } @@ -757,6 +761,7 @@ Expected> CovMapFuncRecordReader::get( case CovMapVersion::Version4: case CovMapVersion::Version5: case CovMapVersion::Version6: + case CovMapVersion::Version7: // Decompress the name data. if (Error E = P.create(P.getNameData())) return std::move(E); @@ -775,6 +780,9 @@ Expected> CovMapFuncRecordReader::get( else if (Version == CovMapVersion::Version6) return std::make_unique>(P, R, D, F); + else if (Version == CovMapVersion::Version7) + return std::make_unique>(P, R, D, F); } llvm_unreachable("Unsupported version"); } @@ -827,33 +835,36 @@ static const char *TestingFormatMagic = "llvmcovmtestdata"; Expected> BinaryCoverageReader::createCoverageReaderFromBuffer( StringRef Coverage, FuncRecordsStorage &&FuncRecords, - InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress, + std::unique_ptr ProfileNamesPtr, uint8_t BytesInAddress, support::endianness Endian, StringRef CompilationDir) { - std::unique_ptr Reader( - new BinaryCoverageReader(std::move(FuncRecords))); - Reader->ProfileNames = std::move(ProfileNames); + if (ProfileNamesPtr == nullptr) + return make_error(coveragemap_error::malformed, + "Caller must provide ProfileNames"); + std::unique_ptr Reader(new BinaryCoverageReader( + std::move(ProfileNamesPtr), std::move(FuncRecords))); + InstrProfSymtab &ProfileNames = *Reader->ProfileNames; StringRef FuncRecordsRef = Reader->FuncRecords->getBuffer(); if (BytesInAddress == 4 && Endian == support::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else if (BytesInAddress == 4 && Endian == support::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else if (BytesInAddress == 8 && Endian == support::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else if (BytesInAddress == 8 && Endian == support::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else return make_error(coveragemap_error::malformed); @@ -882,8 +893,8 @@ loadTestingFormat(StringRef Data, StringRef CompilationDir) { Data = Data.substr(N); if (Data.size() < ProfileNamesSize) return make_error(coveragemap_error::malformed); - InstrProfSymtab ProfileNames; - if (Error E = ProfileNames.create(Data.substr(0, ProfileNamesSize), Address)) + auto ProfileNames = std::make_unique(); + if (Error E = ProfileNames->create(Data.substr(0, ProfileNamesSize), Address)) return std::move(E); Data = Data.substr(ProfileNamesSize); // Skip the padding bytes because coverage map data has an alignment of 8. @@ -926,10 +937,13 @@ loadTestingFormat(StringRef Data, StringRef CompilationDir) { BytesInAddress, Endian, CompilationDir); } -/// Find all sections that match \p Name. There may be more than one if comdats -/// are in use, e.g. for the __llvm_covfun section on ELF. -static Expected> lookupSections(ObjectFile &OF, - StringRef Name) { +/// Find all sections that match \p IPSK name. There may be more than one if +/// comdats are in use, e.g. for the __llvm_covfun section on ELF. +static Expected> +lookupSections(ObjectFile &OF, InstrProfSectKind IPSK) { + auto ObjFormat = OF.getTripleObjectFormat(); + auto Name = + getInstrProfSectionName(IPSK, ObjFormat, /*AddSegmentInfo=*/false); // On COFF, the object file section name may end in "$M". This tells the // linker to sort these sections between "$A" and "$Z". The linker removes the // dollar and everything after it in the final binary. Do the same to match. @@ -944,8 +958,13 @@ static Expected> lookupSections(ObjectFile &OF, Expected NameOrErr = Section.getName(); if (!NameOrErr) return NameOrErr.takeError(); - if (stripSuffix(*NameOrErr) == Name) + if (stripSuffix(*NameOrErr) == Name) { + // COFF profile name section contains two null bytes indicating the + // start/end of the section. If its size is 2 bytes, it's empty. + if (IsCOFF && IPSK == IPSK_name && Section.getSize() == 2) + continue; Sections.push_back(Section); + } } if (Sections.empty()) return make_error(coveragemap_error::no_data_found); @@ -981,15 +1000,27 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch, : support::endianness::big; // Look for the sections that we are interested in. - auto ObjFormat = OF->getTripleObjectFormat(); - auto NamesSection = - lookupSections(*OF, getInstrProfSectionName(IPSK_name, ObjFormat, - /*AddSegmentInfo=*/false)); - if (auto E = NamesSection.takeError()) + auto ProfileNames = std::make_unique(); + std::vector NamesSectionRefs; + // If IPSK_name is not found, fallback to search for IPK_covname, which is + // used when binary correlation is enabled. + auto NamesSection = lookupSections(*OF, IPSK_name); + if (auto E = NamesSection.takeError()) { + consumeError(std::move(E)); + NamesSection = lookupSections(*OF, IPSK_covname); + if (auto E = NamesSection.takeError()) + return std::move(E); + } + NamesSectionRefs = *NamesSection; + + if (NamesSectionRefs.size() != 1) + return make_error( + coveragemap_error::malformed, + "the size of coverage mapping section is not one"); + if (Error E = ProfileNames->create(NamesSectionRefs.back())) return std::move(E); - auto CoverageSection = - lookupSections(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat, - /*AddSegmentInfo=*/false)); + + auto CoverageSection = lookupSections(*OF, IPSK_covmap); if (auto E = CoverageSection.takeError()) return std::move(E); std::vector CoverageSectionRefs = *CoverageSection; @@ -1000,17 +1031,8 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch, return CoverageMappingOrErr.takeError(); StringRef CoverageMapping = CoverageMappingOrErr.get(); - InstrProfSymtab ProfileNames; - std::vector NamesSectionRefs = *NamesSection; - if (NamesSectionRefs.size() != 1) - return make_error(coveragemap_error::malformed); - if (Error E = ProfileNames.create(NamesSectionRefs.back())) - return std::move(E); - // Look for the coverage records section (Version4 only). - auto CoverageRecordsSections = - lookupSections(*OF, getInstrProfSectionName(IPSK_covfun, ObjFormat, - /*AddSegmentInfo=*/false)); + auto CoverageRecordsSections = lookupSections(*OF, IPSK_covfun); BinaryCoverageReader::FuncRecordsStorage FuncRecords; if (auto E = CoverageRecordsSections.takeError()) { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 0f9c33de3f52296c8b7f18ea4f2377e9ec18b937..6520b62b70b20c6f212f5acab3a1081e988ad15a 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -35,6 +36,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -58,6 +60,8 @@ using namespace llvm; +#define DEBUG_TYPE "instrprof" + static cl::opt StaticFuncFullModulePrefix( "static-func-full-module-prefix", cl::init(true), cl::Hidden, cl::desc("Use full module build paths in the profile counter names for " @@ -112,11 +116,11 @@ static std::string getInstrProfErrString(instrprof_error Err, case instrprof_error::malformed: OS << "malformed instrumentation profile data"; break; - case instrprof_error::missing_debug_info_for_correlation: - OS << "debug info for correlation is required"; + case instrprof_error::missing_correlation_info: + OS << "debug info/binary for correlation is required"; break; - case instrprof_error::unexpected_debug_info_for_correlation: - OS << "debug info for correlation is not necessary"; + case instrprof_error::unexpected_correlation_info: + OS << "debug info/binary for correlation is not necessary"; break; case instrprof_error::unable_to_correlate_profile: OS << "unable to correlate profile"; @@ -135,6 +139,9 @@ static std::string getInstrProfErrString(instrprof_error Err, case instrprof_error::count_mismatch: OS << "function basic block count change detected (counter mismatch)"; break; + case instrprof_error::bitmap_mismatch: + OS << "function bitmap size change detected (bitmap size mismatch)"; + break; case instrprof_error::counter_overflow: OS << "counter overflow"; break; @@ -157,6 +164,9 @@ static std::string getInstrProfErrString(instrprof_error Err, case instrprof_error::raw_profile_version_mismatch: OS << "raw profile version mismatch"; break; + case instrprof_error::counter_value_too_large: + OS << "excessively large counter value suggests corrupted profile data"; + break; } // If optional error message is not empty, append it to the message. @@ -214,6 +224,18 @@ cl::opt DoInstrProfNameCompression( "enable-name-compression", cl::desc("Enable name/filename string compression"), cl::init(true)); +cl::opt EnableVTableValueProfiling( + "enable-vtable-value-profiling", cl::init(false), + cl::desc("If true, the virtual table address will be instrumented to know " + "the types of a C++ pointer. The information is used in indirect " + "call promotion to do selective vtable-based comparison.")); + +cl::opt EnableVTableProfileUse( + "enable-vtable-profile-use", cl::init(false), + cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " + "profiles will be used by ICP pass for more efficient indirect " + "call sequence. If false, type profiles won't be used.")); + std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo) { @@ -264,6 +286,110 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) { return PathNameStr.substr(LastPos); } +static StringRef getStrippedSourceFileName(const GlobalObject &GO) { + StringRef FileName(GO.getParent()->getSourceFileName()); + uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1; + if (StripLevel < StaticFuncStripDirNamePrefix) + StripLevel = StaticFuncStripDirNamePrefix; + if (StripLevel) + FileName = stripDirPrefix(FileName, StripLevel); + return FileName; +} + +// The PGO name has the format [;] where ; is +// provided if linkage is local and is used to discriminate possibly identical +// mangled names. ";" is used because it is unlikely to be found in either +// or . +// +// Older compilers used getPGOFuncName() which has the format +// [:]. This caused trouble for Objective-C functions +// which commonly have :'s in their names. We still need to compute this name to +// lookup functions from profiles built by older compilers. +static std::string +getIRPGONameForGlobalObject(const GlobalObject &GO, + GlobalValue::LinkageTypes Linkage, + StringRef FileName) { + return GlobalValue::getGlobalIdentifier(GO.getName(), Linkage, FileName); +} + +static std::optional lookupPGONameFromMetadata(MDNode *MD) { + if (MD != nullptr) { + StringRef S = cast(MD->getOperand(0))->getString(); + return S.str(); + } + return {}; +} + +// Returns the PGO object name. This function has some special handling +// when called in LTO optimization. The following only applies when calling in +// LTO passes (when \c InLTO is true): LTO's internalization privatizes many +// global linkage symbols. This happens after value profile annotation, but +// those internal linkage functions should not have a source prefix. +// Additionally, for ThinLTO mode, exported internal functions are promoted +// and renamed. We need to ensure that the original internal PGO name is +// used when computing the GUID that is compared against the profiled GUIDs. +// To differentiate compiler generated internal symbols from original ones, +// PGOFuncName meta data are created and attached to the original internal +// symbols in the value profile annotation step +// (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta +// data, its original linkage must be non-internal. +static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO, + MDNode *PGONameMetadata) { + if (!InLTO) { + auto FileName = getStrippedSourceFileName(GO); + return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName); + } + + // In LTO mode (when InLTO is true), first check if there is a meta data. + if (auto IRPGOFuncName = lookupPGONameFromMetadata(PGONameMetadata)) + return *IRPGOFuncName; + + // If there is no meta data, the function must be a global before the value + // profile annotation pass. Its current linkage may be internal if it is + // internalized in LTO mode. + return getIRPGONameForGlobalObject(GO, GlobalValue::ExternalLinkage, ""); +} + +// The PGO name has the format [;] where ; is +// provided if linkage is local and is the mangled function +// name. The filepath is used to discriminate possibly identical function names. +// ; is used because it is unlikely to be found in either or +// . +// +// Older compilers used getPGOFuncName() which has the format +// [:]. is used to discriminate between +// possibly identical function names when linkage is local and +// simply comes from F.getName(). This caused trouble for Objective-C functions +// which commonly have :'s in their names. Also, since is not +// mangled, they cannot be passed to Mach-O linkers via -order_file. We still +// need to compute this name to lookup functions from profiles built by older +// compilers. +static std::string getIRPGOFuncName(const Function &F, + GlobalValue::LinkageTypes Linkage, + StringRef FileName) { + SmallString<64> Name; + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + Name.append(FileName.empty() ? "" : FileName); + Name.append(";"); + } + Mangler().getNameWithPrefix(Name, &F, /*CannotUsePrivateLabel=*/true); + return Name.str().str(); +} + +static std::optional lookupPGOFuncName(const Function &F) { + if (MDNode *MD = getPGOFuncNameMetadata(F)) { + StringRef S = cast(MD->getOperand(0))->getString(); + return S.str(); + } + return {}; +} + +// Returns the IRPGO function name and does special handling when called +// in LTO optimization. See the comments of `getIRPGOObjectName` for details. +std::string getIRPGOFuncName(const Function &F, bool InLTO) { + return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F)); +} + // Return the PGOFuncName. This function has some special handling when called // in LTO optimization. The following only applies when calling in LTO passes // (when \c InLTO is true): LTO's internalization privatizes many global linkage @@ -279,20 +405,13 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) { // data, its original linkage must be non-internal. std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { if (!InLTO) { - StringRef FileName(F.getParent()->getSourceFileName()); - uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1; - if (StripLevel < StaticFuncStripDirNamePrefix) - StripLevel = StaticFuncStripDirNamePrefix; - if (StripLevel) - FileName = stripDirPrefix(FileName, StripLevel); + auto FileName = getStrippedSourceFileName(F); return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version); } // In LTO mode (when InLTO is true), first check if there is a meta data. - if (MDNode *MD = getPGOFuncNameMetadata(F)) { - StringRef S = cast(MD->getOperand(0))->getString(); - return S.str(); - } + if (auto PGOFuncName = lookupPGOFuncName(F)) + return *PGOFuncName; // If there is no meta data, the function must be a global before the value // profile annotation pass. Its current linkage may be internal if it is @@ -300,6 +419,29 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, ""); } +std::string getPGOName(const GlobalVariable &V, bool InLTO) { + // PGONameMetadata should be set by compiler at profile use time + // and read by symtab creation to look up symbols corresponding to + // a MD5 hash. + return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName())); +} + +// See getIRPGOFuncName() for a discription of the format. +std::pair getParsedIRPGOFuncName(StringRef IRPGOName) { + auto [FileName, MangledName] = IRPGOName.split(';'); + if (MangledName.empty()) + return std::make_pair(StringRef(), IRPGOName); + return std::make_pair(FileName, MangledName); +} + +// See getIRPGOObjectName() for a discription of the format. +std::pair getParsedIRPGOName(StringRef IRPGOName) { + auto [FileName, MangledName] = IRPGOName.split(';'); + if (MangledName.empty()) + return std::make_pair(StringRef(), IRPGOName); + return std::make_pair(FileName, MangledName); +} + StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { if (FileName.empty()) return PGOFuncName; @@ -320,7 +462,7 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; // Now fix up illegal chars in local VarName that may upset the assembler. - const char *InvalidChars = "-:<>/\"'"; + const char InvalidChars[] = "-:;<>/\"'"; size_t found = VarName.find_first_of(InvalidChars); while (found != std::string::npos) { VarName[found] = '_'; @@ -366,41 +508,168 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { // Ignore in this case. if (!F.hasName()) continue; - const std::string &PGOFuncName = getPGOFuncName(F, InLTO); - if (Error E = addFuncName(PGOFuncName)) + if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO))) + return E; + // Also use getPGOFuncName() so that we can find records from older profiles + if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO))) + return E; + } + + SmallVector Types; + for (GlobalVariable &G : M.globals()) { + if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) + continue; + if (Error E = addVTableWithName(G, getPGOName(G, InLTO))) return E; - MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); - // In ThinLTO, local function may have been promoted to global and have - // suffix ".llvm." added to the function name. We need to add the - // stripped function name to the symbol table so that we can find a match - // from profile. - // - // We may have other suffixes similar as ".llvm." which are needed to - // be stripped before the matching, but ".__uniq." suffix which is used - // to differentiate internal linkage functions in different modules - // should be kept. Now this is the only suffix with the pattern ".xxx" - // which is kept before matching. - const std::string UniqSuffix = ".__uniq."; - auto pos = PGOFuncName.find(UniqSuffix); - // Search '.' after ".__uniq." if ".__uniq." exists, otherwise - // search '.' from the beginning. - if (pos != std::string::npos) - pos += UniqSuffix.length(); - else - pos = 0; - pos = PGOFuncName.find('.', pos); - if (pos != std::string::npos && pos != 0) { - const std::string &OtherFuncName = PGOFuncName.substr(0, pos); - if (Error E = addFuncName(OtherFuncName)) - return E; - MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); - } } + Sorted = false; finalizeSymtab(); return Error::success(); } +Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable, + StringRef VTablePGOName) { + auto NameToGUIDMap = [&](StringRef Name) -> Error { + if (Error E = addSymbolName(Name)) + return E; + + bool Inserted = true; + std::tie(std::ignore, Inserted) = + MD5VTableMap.try_emplace(GlobalValue::getGUID(Name), &VTable); + return Error::success(); + }; + if (Error E = NameToGUIDMap(VTablePGOName)) + return E; + + StringRef CanonicalName = getCanonicalName(VTablePGOName); + if (CanonicalName != VTablePGOName) + return NameToGUIDMap(CanonicalName); + + return Error::success(); +} + +/// \c NameStrings is a string composed of one of more possibly encoded +/// sub-strings. The substrings are separated by 0 or more zero bytes. This +/// method decodes the string and calls `NameCallback` for each substring. +static Error +readAndDecodeStrings(StringRef NameStrings, + std::function NameCallback) { + const uint8_t *P = NameStrings.bytes_begin(); + const uint8_t *EndP = NameStrings.bytes_end(); + while (P < EndP) { + uint32_t N; + uint64_t UncompressedSize = decodeULEB128(P, &N); + P += N; + uint64_t CompressedSize = decodeULEB128(P, &N); + P += N; + const bool IsCompressed = (CompressedSize != 0); + SmallVector UncompressedNameStrings; + StringRef NameStrings; + if (IsCompressed) { + if (!llvm::compression::zlib::isAvailable()) + return make_error(instrprof_error::zlib_unavailable); + + if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize), + UncompressedNameStrings, + UncompressedSize)) { + consumeError(std::move(E)); + return make_error(instrprof_error::uncompress_failed); + } + P += CompressedSize; + NameStrings = toStringRef(UncompressedNameStrings); + } else { + NameStrings = + StringRef(reinterpret_cast(P), UncompressedSize); + P += UncompressedSize; + } + // Now parse the name strings. + SmallVector Names; + NameStrings.split(Names, getInstrProfNameSeparator()); + for (StringRef &Name : Names) + if (Error E = NameCallback(Name)) + return E; + + while (P < EndP && *P == 0) + P++; + } + return Error::success(); +} + +Error InstrProfSymtab::create(StringRef NameStrings) { + return readAndDecodeStrings( + NameStrings, + std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1)); +} + +Error InstrProfSymtab::create(StringRef FuncNameStrings, + StringRef VTableNameStrings) { + if (Error E = readAndDecodeStrings(FuncNameStrings, + std::bind(&InstrProfSymtab::addFuncName, + this, std::placeholders::_1))) + return E; + + return readAndDecodeStrings( + VTableNameStrings, + std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); +} + +Error InstrProfSymtab::initVTableNamesFromCompressedStrings( + StringRef CompressedVTableStrings) { + return readAndDecodeStrings( + CompressedVTableStrings, + std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); +} + +StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName) { + // In ThinLTO, local function may have been promoted to global and have + // suffix ".llvm." added to the function name. We need to add the + // stripped function name to the symbol table so that we can find a match + // from profile. + // + // ".__uniq." suffix is used to differentiate internal linkage functions in + // different modules and should be kept. This is the only suffix with the + // pattern ".xxx" which is kept before matching, other suffixes similar as + // ".llvm." will be stripped. + const std::string UniqSuffix = ".__uniq."; + size_t Pos = PGOName.find(UniqSuffix); + if (Pos != StringRef::npos) + Pos += UniqSuffix.length(); + else + Pos = 0; + + // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from + // the beginning. + Pos = PGOName.find('.', Pos); + if (Pos != StringRef::npos && Pos != 0) + return PGOName.substr(0, Pos); + + return PGOName; +} + +Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { + auto NameToGUIDMap = [&](StringRef Name) -> Error { + if (Error E = addFuncName(Name)) + return E; + MD5FuncMap.emplace_back(Function::getGUID(Name), &F); + return Error::success(); + }; + if (Error E = NameToGUIDMap(PGOFuncName)) + return E; + + StringRef CanonicalFuncName = getCanonicalName(PGOFuncName); + if (CanonicalFuncName != PGOFuncName) + return NameToGUIDMap(CanonicalFuncName); + + return Error::success(); +} + +uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) { + // Given a runtime address, look up the hash value in the interval map, and + // fallback to value 0 if a hash value is not found. + return VTableAddrMap.lookup(Address, 0); +} + uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) { finalizeSymtab(); auto It = partition_point(AddrToMD5Map, [=](std::pair A) { @@ -422,6 +691,45 @@ void InstrProfSymtab::dumpNames(raw_ostream &OS) const { OS << S << '\n'; } +Error collectGlobalObjectNameStrings(ArrayRef NameStrs, + bool DoCompression, std::string &Result) { + assert(!NameStrs.empty() && "No name data to emit"); + + uint8_t Header[20], *P = Header; + std::string UncompressedNameStrings = + join(NameStrs.begin(), NameStrs.end(), getInstrProfNameSeparator()); + + assert( + StringRef(UncompressedNameStrings).count(getInstrProfNameSeparator()) == + (NameStrs.size() - 1) && + "PGO name is invalid (contains separator token)"); + + unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P); + P += EncLen; + + auto WriteStringToResult = [&](size_t CompressedLen, StringRef InputStr) { + EncLen = encodeULEB128(CompressedLen, P); + P += EncLen; + char *HeaderStr = reinterpret_cast(&Header[0]); + unsigned HeaderLen = P - &Header[0]; + Result.append(HeaderStr, HeaderLen); + Result += InputStr; + return Error::success(); + }; + + if (!DoCompression) { + return WriteStringToResult(0, UncompressedNameStrings); + } + + SmallVector CompressedNameStrings; + compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings), + CompressedNameStrings, + compression::zlib::BestSizeCompression); + + return WriteStringToResult(CompressedNameStrings.size(), + toStringRef(CompressedNameStrings)); +} + Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result) { assert(!NameStrs.empty() && "No name data to emit"); @@ -473,50 +781,18 @@ Error collectPGOFuncNameStrings(ArrayRef NameVars, for (auto *NameVar : NameVars) { NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar))); } - return collectPGOFuncNameStrings( + return collectGlobalObjectNameStrings( NameStrs, compression::zlib::isAvailable() && doCompression, Result); } -Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { - const uint8_t *P = NameStrings.bytes_begin(); - const uint8_t *EndP = NameStrings.bytes_end(); - while (P < EndP) { - uint32_t N; - uint64_t UncompressedSize = decodeULEB128(P, &N); - P += N; - uint64_t CompressedSize = decodeULEB128(P, &N); - P += N; - bool isCompressed = (CompressedSize != 0); - SmallVector UncompressedNameStrings; - StringRef NameStrings; - if (isCompressed) { - if (!llvm::compression::zlib::isAvailable()) - return make_error(instrprof_error::zlib_unavailable); - - if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize), - UncompressedNameStrings, - UncompressedSize)) { - consumeError(std::move(E)); - return make_error(instrprof_error::uncompress_failed); - } - P += CompressedSize; - NameStrings = toStringRef(UncompressedNameStrings); - } else { - NameStrings = - StringRef(reinterpret_cast(P), UncompressedSize); - P += UncompressedSize; - } - // Now parse the name strings. - SmallVector Names; - NameStrings.split(Names, getInstrProfNameSeparator()); - for (StringRef &Name : Names) - if (Error E = Symtab.addFuncName(Name)) - return E; - - while (P < EndP && *P == 0) - P++; - } - return Error::success(); +Error collectVTableStrings(ArrayRef VTables, + std::string &Result, bool DoCompression) { + std::vector VTableNameStrs; + for (auto *VTable : VTables) + VTableNameStrs.push_back(getPGOName(*VTable)); + return collectGlobalObjectNameStrings( + VTableNameStrs, compression::zlib::isAvailable() && DoCompression, + Result); } void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { @@ -732,6 +1008,18 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight, Warn(instrprof_error::counter_overflow); } + // If the number of bitmap bytes doesn't match we either have bad data + // or a hash collision. + if (BitmapBytes.size() != Other.BitmapBytes.size()) { + Warn(instrprof_error::bitmap_mismatch); + return; + } + + // Bitmap bytes are merged by simply ORing them together. + for (size_t I = 0, E = Other.BitmapBytes.size(); I < E; ++I) { + BitmapBytes[I] = Other.BitmapBytes[I] | BitmapBytes[I]; + } + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) mergeValueProfData(Kind, Other, Weight, Warn); } @@ -769,6 +1057,9 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, if (ValueKind == IPVK_IndirectCallTarget) return SymTab->getFunctionHashFromAddress(Value); + if (ValueKind == IPVK_VTableTarget) + return SymTab->getVTableHashFromAddress(Value); + return Value; } @@ -1088,46 +1379,44 @@ void annotateValueSite(Module &M, Instruction &Inst, Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals)); } -bool getValueProfDataFromInst(const Instruction &Inst, - InstrProfValueKind ValueKind, - uint32_t MaxNumValueData, - InstrProfValueData ValueData[], - uint32_t &ActualNumValueData, uint64_t &TotalC, - bool GetNoICPValue) { +MDNode *mayHaveValueProfileOfKind(const Instruction &Inst, + InstrProfValueKind ValueKind) { MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof); if (!MD) - return false; + return nullptr; - unsigned NOps = MD->getNumOperands(); + if (MD->getNumOperands() < 5) + return nullptr; - if (NOps < 5) - return false; - - // Operand 0 is a string tag "VP": MDString *Tag = cast(MD->getOperand(0)); - if (!Tag) - return false; - - if (!Tag->getString().equals("VP")) - return false; + if (!Tag || !Tag->getString().equals("VP")) + return nullptr; // Now check kind: ConstantInt *KindInt = mdconst::dyn_extract(MD->getOperand(1)); if (!KindInt) - return false; + return nullptr; if (KindInt->getZExtValue() != ValueKind) - return false; + return nullptr; + + return MD; +} +static bool getValueProfDataFromInstImpl(const MDNode *const MD, + const uint32_t MaxNumDataWant, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, + uint64_t &TotalC, bool GetNoICPValue) { + const unsigned NOps = MD->getNumOperands(); // Get total count ConstantInt *TotalCInt = mdconst::dyn_extract(MD->getOperand(2)); if (!TotalCInt) return false; TotalC = TotalCInt->getZExtValue(); - ActualNumValueData = 0; for (unsigned I = 3; I < NOps; I += 2) { - if (ActualNumValueData >= MaxNumValueData) + if (ActualNumValueData >= MaxNumDataWant) break; ConstantInt *Value = mdconst::dyn_extract(MD->getOperand(I)); ConstantInt *Count = @@ -1144,24 +1433,66 @@ bool getValueProfDataFromInst(const Instruction &Inst, return true; } +std::unique_ptr +getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, uint32_t &ActualNumValueData, + uint64_t &TotalC, bool GetNoICPValue) { + MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind); + if (!MD) + return nullptr; + auto ValueDataArray = std::make_unique(MaxNumValueData); + if (!getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueDataArray.get(), + ActualNumValueData, TotalC, GetNoICPValue)) + return nullptr; + return ValueDataArray; +} + +// FIXME: Migrate existing callers to the function above that returns an +// array. +bool getValueProfDataFromInst(const Instruction &Inst, + InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC, + bool GetNoICPValue) { + MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind); + if (!MD) + return false; + return getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueData, + ActualNumValueData, TotalC, + GetNoICPValue); +} + MDNode *getPGOFuncNameMetadata(const Function &F) { return F.getMetadata(getPGOFuncNameMetadataName()); } -void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { - // Only for internal linkage functions. - if (PGOFuncName == F.getName()) - return; - // Don't create duplicated meta-data. - if (getPGOFuncNameMetadata(F)) +static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName, + StringRef PGOName) { + // Only for internal linkage functions or global variables. The name is not + // the same as PGO name for these global objects. + if (GO.getName() == PGOName) + return; + + // Don't create duplicated metadata. + if (GO.getMetadata(MetadataName)) return; - LLVMContext &C = F.getContext(); - MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName)); - F.setMetadata(getPGOFuncNameMetadataName(), N); + + LLVMContext &C = GO.getContext(); + MDNode *N = MDNode::get(C, MDString::get(C, PGOName)); + GO.setMetadata(MetadataName, N); +} + +void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { + return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName); } -bool needsComdatForCounter(const Function &F, const Module &M) { - if (F.hasComdat()) +void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) { + return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName); +} + +bool needsComdatForCounter(const GlobalObject &GO, const Module &M) { + if (GO.hasComdat()) return true; if (!Triple(M.getTargetTriple()).supportsCOMDAT()) @@ -1177,7 +1508,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) { // available_externally functions will end up being duplicated in raw profile // data. This can result in distorted profile as the counts of those dups // will be accumulated by the profile merger. - GlobalValue::LinkageTypes Linkage = F.getLinkage(); + GlobalValue::LinkageTypes Linkage = GO.getLinkage(); if (Linkage != GlobalValue::ExternalWeakLinkage && Linkage != GlobalValue::AvailableExternallyLinkage) return false; @@ -1333,7 +1664,7 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) { if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f) continue; - char ProfileKindName[20]; + char ProfileKindName[20] = {0}; switch (I) { case IPVK_IndirectCallTarget: strncpy(ProfileKindName, "IndirectCall", 19); @@ -1341,6 +1672,9 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { case IPVK_MemOPSize: strncpy(ProfileKindName, "MemOP", 19); break; + case IPVK_VTableTarget: + strncpy(ProfileKindName, "VTable", 19); + break; default: snprintf(ProfileKindName, 19, "VP[%d]", I); break; @@ -1404,9 +1738,14 @@ Expected
Header::readFromBuffer(const unsigned char *Buffer) { // When a new field is added in the header add a case statement here to // populate it. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version10, + IndexedInstrProf::ProfVersion::CurrentVersion == Version12, "Please update the reading code below if a new field has been added, " "if not add a case statement to fall through to the latest version."); + case 12ull: + H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset)); + [[fallthrough]]; + case 11ull: + [[fallthrough]]; case 10ull: H.TemporalProfTracesOffset = read(Buffer, offsetOf(&Header::TemporalProfTracesOffset)); @@ -1430,10 +1769,16 @@ size_t Header::size() const { // When a new field is added to the header add a case statement here to // compute the size as offset of the new field + size of the new field. This // relies on the field being added to the end of the list. - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12, "Please update the size computation below if a new field has " "been added to the header, if not add a case statement to " "fall through to the latest version."); + case 12ull: + return offsetOf(&Header::VTableNamesOffset) + + sizeof(Header::VTableNamesOffset); + [[fallthrough]]; + case 11ull: + [[fallthrough]]; case 10ull: return offsetOf(&Header::TemporalProfTracesOffset) + sizeof(Header::TemporalProfTracesOffset); diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp index c822d81f8bef1645c0aa3852753b6c81e0cab090..f5a7c75e564f111ab34a4d384056981c2be7432a 100644 --- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp +++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp @@ -22,15 +22,28 @@ using namespace llvm; -/// Get the __llvm_prf_cnts section. -Expected getCountersSection(const object::ObjectFile &Obj) { - for (auto &Section : Obj.sections()) +/// Get profile section. +Expected getInstrProfSection(const object::ObjectFile &Obj, + InstrProfSectKind IPSK) { + // On COFF, the getInstrProfSectionName returns the section names may followed + // by "$M". The linker removes the dollar and everything after it in the final + // binary. Do the same to match. + Triple::ObjectFormatType ObjFormat = Obj.getTripleObjectFormat(); + auto StripSuffix = [ObjFormat](StringRef N) { + return ObjFormat == Triple::COFF ? N.split('$').first : N; + }; + std::string ExpectedSectionName = + getInstrProfSectionName(IPSK, ObjFormat, + /*AddSegmentInfo=*/false); + ExpectedSectionName = StripSuffix(ExpectedSectionName); + for (auto &Section : Obj.sections()) { if (auto SectionName = Section.getName()) - if (SectionName.get() == INSTR_PROF_CNTS_SECT_NAME) + if (*SectionName == ExpectedSectionName) return Section; + } return make_error( instrprof_error::unable_to_correlate_profile, - "could not find counter section (" INSTR_PROF_CNTS_SECT_NAME ")"); + "could not find section (" + Twine(ExpectedSectionName) + ")"); } const char *InstrProfCorrelator::FunctionNameAttributeName = "Function Name"; @@ -39,56 +52,95 @@ const char *InstrProfCorrelator::NumCountersAttributeName = "Num Counters"; llvm::Expected> InstrProfCorrelator::Context::get(std::unique_ptr Buffer, - const object::ObjectFile &Obj) { - auto CountersSection = getCountersSection(Obj); + const object::ObjectFile &Obj, + ProfCorrelatorKind FileKind) { + auto C = std::make_unique(); + auto CountersSection = getInstrProfSection(Obj, IPSK_cnts); if (auto Err = CountersSection.takeError()) return std::move(Err); - auto C = std::make_unique(); + if (FileKind == InstrProfCorrelator::BINARY) { + auto DataSection = getInstrProfSection(Obj, IPSK_covdata); + if (auto Err = DataSection.takeError()) + return std::move(Err); + auto DataOrErr = DataSection->getContents(); + if (!DataOrErr) + return DataOrErr.takeError(); + auto NameSection = getInstrProfSection(Obj, IPSK_covname); + if (auto Err = NameSection.takeError()) + return std::move(Err); + auto NameOrErr = NameSection->getContents(); + if (!NameOrErr) + return NameOrErr.takeError(); + C->DataStart = DataOrErr->data(); + C->DataEnd = DataOrErr->data() + DataOrErr->size(); + C->NameStart = NameOrErr->data(); + C->NameSize = NameOrErr->size(); + } C->Buffer = std::move(Buffer); C->CountersSectionStart = CountersSection->getAddress(); C->CountersSectionEnd = C->CountersSectionStart + CountersSection->getSize(); + // In COFF object file, there's a null byte at the beginning of the counter + // section which doesn't exist in raw profile. + if (Obj.getTripleObjectFormat() == Triple::COFF) + ++C->CountersSectionStart; + C->ShouldSwapBytes = Obj.isLittleEndian() != sys::IsLittleEndianHost; return Expected>(std::move(C)); } llvm::Expected> -InstrProfCorrelator::get(StringRef DebugInfoFilename) { - auto DsymObjectsOrErr = - object::MachOObjectFile::findDsymObjectMembers(DebugInfoFilename); - if (auto Err = DsymObjectsOrErr.takeError()) - return std::move(Err); - if (!DsymObjectsOrErr->empty()) { - // TODO: Enable profile correlation when there are multiple objects in a - // dSYM bundle. - if (DsymObjectsOrErr->size() > 1) - return make_error( - instrprof_error::unable_to_correlate_profile, - "using multiple objects is not yet supported"); - DebugInfoFilename = *DsymObjectsOrErr->begin(); +InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) { + if (FileKind == DEBUG_INFO) { + auto DsymObjectsOrErr = + object::MachOObjectFile::findDsymObjectMembers(Filename); + if (auto Err = DsymObjectsOrErr.takeError()) + return std::move(Err); + if (!DsymObjectsOrErr->empty()) { + // TODO: Enable profile correlation when there are multiple objects in a + // dSYM bundle. + if (DsymObjectsOrErr->size() > 1) + return make_error( + instrprof_error::unable_to_correlate_profile, + "using multiple objects is not yet supported"); + Filename = *DsymObjectsOrErr->begin(); + } + auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename)); + if (auto Err = BufferOrErr.takeError()) + return std::move(Err); + + return get(std::move(*BufferOrErr), FileKind); } - auto BufferOrErr = - errorOrToExpected(MemoryBuffer::getFile(DebugInfoFilename)); - if (auto Err = BufferOrErr.takeError()) - return std::move(Err); + if (FileKind == BINARY) { + auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename)); + if (auto Err = BufferOrErr.takeError()) + return std::move(Err); - return get(std::move(*BufferOrErr)); + return get(std::move(*BufferOrErr), FileKind); + } + return make_error( + instrprof_error::unable_to_correlate_profile, + "unsupported correlation kind (only DWARF debug info and Binary format " + "(ELF/COFF) are supported)"); } llvm::Expected> -InstrProfCorrelator::get(std::unique_ptr Buffer) { +InstrProfCorrelator::get(std::unique_ptr Buffer, + ProfCorrelatorKind FileKind) { auto BinOrErr = object::createBinary(*Buffer); if (auto Err = BinOrErr.takeError()) return std::move(Err); if (auto *Obj = dyn_cast(BinOrErr->get())) { - auto CtxOrErr = Context::get(std::move(Buffer), *Obj); + auto CtxOrErr = Context::get(std::move(Buffer), *Obj, FileKind); if (auto Err = CtxOrErr.takeError()) return std::move(Err); auto T = Obj->makeTriple(); if (T.isArch64Bit()) - return InstrProfCorrelatorImpl::get(std::move(*CtxOrErr), *Obj); + return InstrProfCorrelatorImpl::get(std::move(*CtxOrErr), *Obj, + FileKind); if (T.isArch32Bit()) - return InstrProfCorrelatorImpl::get(std::move(*CtxOrErr), *Obj); + return InstrProfCorrelatorImpl::get(std::move(*CtxOrErr), *Obj, + FileKind); } return make_error( instrprof_error::unable_to_correlate_profile, "not an object file"); @@ -130,29 +182,35 @@ template llvm::Expected>> InstrProfCorrelatorImpl::get( std::unique_ptr Ctx, - const object::ObjectFile &Obj) { - if (Obj.isELF() || Obj.isMachO()) { - auto DICtx = DWARFContext::create(Obj); - return std::make_unique>(std::move(DICtx), - std::move(Ctx)); + const object::ObjectFile &Obj, ProfCorrelatorKind FileKind) { + if (FileKind == DEBUG_INFO) { + if (Obj.isELF() || Obj.isMachO()) { + auto DICtx = DWARFContext::create(Obj); + return std::make_unique>( + std::move(DICtx), std::move(Ctx)); + } + return make_error( + instrprof_error::unable_to_correlate_profile, + "unsupported debug info format (only DWARF is supported)"); } + if (Obj.isELF() || Obj.isCOFF()) + return std::make_unique>(std::move(Ctx)); return make_error( instrprof_error::unable_to_correlate_profile, - "unsupported debug info format (only DWARF is supported)"); + "unsupported binary format (only ELF and COFF are supported)"); } template -Error InstrProfCorrelatorImpl::correlateProfileData() { +Error InstrProfCorrelatorImpl::correlateProfileData(int MaxWarnings) { assert(Data.empty() && Names.empty() && NamesVec.empty()); - correlateProfileDataImpl(); - if (Data.empty() || NamesVec.empty()) + correlateProfileDataImpl(MaxWarnings); + if (this->Data.empty()) return make_error( instrprof_error::unable_to_correlate_profile, - "could not find any profile metadata in debug info"); - auto Result = - collectPGOFuncNameStrings(NamesVec, /*doCompression=*/false, Names); - CounterOffsets.clear(); - NamesVec.clear(); + "could not find any profile data metadata in correlated file"); + Error Result = correlateProfileNameImpl(); + this->CounterOffsets.clear(); + this->NamesVec.clear(); return Result; } @@ -180,40 +238,44 @@ template <> struct yaml::SequenceElementTraits { }; template -Error InstrProfCorrelatorImpl::dumpYaml(raw_ostream &OS) { +Error InstrProfCorrelatorImpl::dumpYaml(int MaxWarnings, + raw_ostream &OS) { InstrProfCorrelator::CorrelationData Data; - correlateProfileDataImpl(&Data); + correlateProfileDataImpl(MaxWarnings, &Data); if (Data.Probes.empty()) return make_error( instrprof_error::unable_to_correlate_profile, - "could not find any profile metadata in debug info"); + "could not find any profile data metadata in debug info"); yaml::Output YamlOS(OS); YamlOS << Data; return Error::success(); } template -void InstrProfCorrelatorImpl::addProbe(StringRef FunctionName, - uint64_t CFGHash, - IntPtrT CounterOffset, - IntPtrT FunctionPtr, - uint32_t NumCounters) { +void InstrProfCorrelatorImpl::addDataProbe(uint64_t NameRef, + uint64_t CFGHash, + IntPtrT CounterOffset, + IntPtrT FunctionPtr, + uint32_t NumCounters) { // Check if a probe was already added for this counter offset. if (!CounterOffsets.insert(CounterOffset).second) return; Data.push_back({ - maybeSwap(IndexedInstrProf::ComputeHash(FunctionName)), + maybeSwap(NameRef), maybeSwap(CFGHash), // In this mode, CounterPtr actually stores the section relative address // of the counter. maybeSwap(CounterOffset), + // TODO: MC/DC is not yet supported. + /*BitmapOffset=*/maybeSwap(0), maybeSwap(FunctionPtr), // TODO: Value profiling is not yet supported. /*ValuesPtr=*/maybeSwap(0), maybeSwap(NumCounters), /*NumValueSites=*/{maybeSwap(0), maybeSwap(0)}, + // TODO: MC/DC is not yet supported. + /*NumBitmapBytes=*/maybeSwap(0), }); - NamesVec.push_back(FunctionName.str()); } template @@ -260,7 +322,10 @@ bool DwarfInstrProfCorrelator::isDIEOfProbe(const DWARFDie &Die) { template void DwarfInstrProfCorrelator::correlateProfileDataImpl( - InstrProfCorrelator::CorrelationData *Data) { + int MaxWarnings, InstrProfCorrelator::CorrelationData *Data) { + bool UnlimitedWarnings = (MaxWarnings == 0); + // -N suppressed warnings means we can emit up to N (unsuppressed) warnings + int NumSuppressedWarnings = -MaxWarnings; auto maybeAddProbe = [&](DWARFDie Die) { if (!isDIEOfProbe(Die)) return; @@ -297,30 +362,34 @@ void DwarfInstrProfCorrelator::correlateProfileDataImpl( } } if (!FunctionName || !CFGHash || !CounterPtr || !NumCounters) { - LLVM_DEBUG(dbgs() << "Incomplete DIE for probe\n\tFunctionName: " - << FunctionName << "\n\tCFGHash: " << CFGHash - << "\n\tCounterPtr: " << CounterPtr - << "\n\tNumCounters: " << NumCounters); - LLVM_DEBUG(Die.dump(dbgs())); + if (UnlimitedWarnings || ++NumSuppressedWarnings < 1) { + WithColor::warning() + << "Incomplete DIE for function " << FunctionName + << ": CFGHash=" << CFGHash << " CounterPtr=" << CounterPtr + << " NumCounters=" << NumCounters << "\n"; + LLVM_DEBUG(Die.dump(dbgs())); + } return; } uint64_t CountersStart = this->Ctx->CountersSectionStart; uint64_t CountersEnd = this->Ctx->CountersSectionEnd; if (*CounterPtr < CountersStart || *CounterPtr >= CountersEnd) { - LLVM_DEBUG( - dbgs() << "CounterPtr out of range for probe\n\tFunction Name: " - << FunctionName << "\n\tExpected: [0x" - << Twine::utohexstr(CountersStart) << ", 0x" - << Twine::utohexstr(CountersEnd) << ")\n\tActual: 0x" - << Twine::utohexstr(*CounterPtr)); - LLVM_DEBUG(Die.dump(dbgs())); + if (UnlimitedWarnings || ++NumSuppressedWarnings < 1) { + WithColor::warning() + << format("CounterPtr out of range for function %s: Actual=0x%x " + "Expected=[0x%x, 0x%x)\n", + *FunctionName, *CounterPtr, CountersStart, CountersEnd); + LLVM_DEBUG(Die.dump(dbgs())); + } return; } - if (!FunctionPtr) { - LLVM_DEBUG(dbgs() << "Could not find address of " << *FunctionName - << "\n"); + if (!FunctionPtr && (UnlimitedWarnings || ++NumSuppressedWarnings < 1)) { + WithColor::warning() << format("Could not find address of function %s\n", + *FunctionName); LLVM_DEBUG(Die.dump(dbgs())); } + // In debug info correlation mode, the CounterPtr is an absolute address of + // the counter, but it's expected to be relative later when iterating Data. IntPtrT CounterOffset = *CounterPtr - CountersStart; if (Data) { InstrProfCorrelator::Probe P; @@ -338,8 +407,9 @@ void DwarfInstrProfCorrelator::correlateProfileDataImpl( P.LineNumber = LineNumber; Data->Probes.push_back(P); } else { - this->addProbe(*FunctionName, *CFGHash, CounterOffset, - FunctionPtr.value_or(0), *NumCounters); + this->addDataProbe(IndexedInstrProf::ComputeHash(*FunctionName), *CFGHash, + CounterOffset, FunctionPtr.value_or(0), *NumCounters); + this->NamesVec.push_back(*FunctionName); } }; for (auto &CU : DICtx->normal_units()) @@ -348,4 +418,64 @@ void DwarfInstrProfCorrelator::correlateProfileDataImpl( for (auto &CU : DICtx->dwo_units()) for (const auto &Entry : CU->dies()) maybeAddProbe(DWARFDie(CU.get(), &Entry)); + + if (!UnlimitedWarnings && NumSuppressedWarnings > 0) + WithColor::warning() << format("Suppressed %d additional warnings\n", + NumSuppressedWarnings); +} + +template +Error DwarfInstrProfCorrelator::correlateProfileNameImpl() { + if (this->NamesVec.empty()) { + return make_error( + instrprof_error::unable_to_correlate_profile, + "could not find any profile name metadata in debug info"); + } + auto Result = + collectGlobalObjectNameStrings(this->NamesVec, + /*doCompression=*/false, this->Names); + return Result; +} + +template +void BinaryInstrProfCorrelator::correlateProfileDataImpl( + int MaxWarnings, InstrProfCorrelator::CorrelationData *CorrelateData) { + using RawProfData = RawInstrProf::ProfileData; + bool UnlimitedWarnings = (MaxWarnings == 0); + // -N suppressed warnings means we can emit up to N (unsuppressed) warnings + int NumSuppressedWarnings = -MaxWarnings; + + const RawProfData *DataStart = (const RawProfData *)this->Ctx->DataStart; + const RawProfData *DataEnd = (const RawProfData *)this->Ctx->DataEnd; + // We need to use < here because the last data record may have no padding. + for (const RawProfData *I = DataStart; I < DataEnd; ++I) { + uint64_t CounterPtr = this->template maybeSwap(I->CounterPtr); + uint64_t CountersStart = this->Ctx->CountersSectionStart; + uint64_t CountersEnd = this->Ctx->CountersSectionEnd; + if (CounterPtr < CountersStart || CounterPtr >= CountersEnd) { + if (UnlimitedWarnings || ++NumSuppressedWarnings < 1) { + WithColor::warning() + << format("CounterPtr out of range for function: Actual=0x%x " + "Expected=[0x%x, 0x%x) at data offset=0x%x\n", + CounterPtr, CountersStart, CountersEnd, + (I - DataStart) * sizeof(RawProfData)); + } + } + // In binary correlation mode, the CounterPtr is an absolute address of the + // counter, but it's expected to be relative later when iterating Data. + IntPtrT CounterOffset = CounterPtr - CountersStart; + this->addDataProbe(I->NameRef, I->FuncHash, CounterOffset, + I->FunctionPointer, I->NumCounters); + } +} + +template +Error BinaryInstrProfCorrelator::correlateProfileNameImpl() { + if (this->Ctx->NameSize == 0) { + return make_error( + instrprof_error::unable_to_correlate_profile, + "could not find any profile data metadata in object file"); + } + this->Names.append(this->Ctx->NameStart, this->Ctx->NameSize); + return Error::success(); } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 4160f7e6dfd557a8902ec2cf5894a1f1674f24c2..5eb4b6e1ff15dc75a346cdb8ea798a6f5fab19d4 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -38,7 +38,7 @@ using namespace llvm; -// Extracts the variant information from the top 8 bits in the version and +// Extracts the variant information from the top 32 bits in the version and // returns an enum specifying the variants present. static InstrProfKind getProfileKindFromVersion(uint64_t Version) { InstrProfKind ProfileKind = InstrProfKind::Unknown; @@ -141,42 +141,33 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer, return Error::success(); } -static Error printBinaryIdsInternal(raw_ostream &OS, - const MemoryBuffer &DataBuffer, - uint64_t BinaryIdsSize, - const uint8_t *BinaryIdsStart, - llvm::support::endianness Endian) { - if (BinaryIdsSize == 0) - return Error::success(); - - std::vector BinaryIds; - if (Error E = readBinaryIdsInternal(DataBuffer, BinaryIdsSize, BinaryIdsStart, - BinaryIds, Endian)) - return E; - +static void +printBinaryIdsInternal(raw_ostream &OS, + std::vector &BinaryIds) { OS << "Binary IDs: \n"; for (auto BI : BinaryIds) { for (uint64_t I = 0; I < BI.size(); I++) OS << format("%02x", BI[I]); OS << "\n"; } - - return Error::success(); } Expected> InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, - const InstrProfCorrelator *Correlator) { + const InstrProfCorrelator *Correlator, + std::function Warn) { // Set up the buffer to read. auto BufferOrError = setupMemoryBuffer(Path, FS); if (Error E = BufferOrError.takeError()) return std::move(E); - return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); + return InstrProfReader::create(std::move(BufferOrError.get()), Correlator, + Warn); } Expected> InstrProfReader::create(std::unique_ptr Buffer, - const InstrProfCorrelator *Correlator) { + const InstrProfCorrelator *Correlator, + std::function Warn) { if (Buffer->getBufferSize() == 0) return make_error(instrprof_error::empty_raw_profile); @@ -185,9 +176,9 @@ InstrProfReader::create(std::unique_ptr Buffer, if (IndexedInstrProfReader::hasFormat(*Buffer)) Result.reset(new IndexedInstrProfReader(std::move(Buffer))); else if (RawInstrProfReader64::hasFormat(*Buffer)) - Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); + Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn)); else if (RawInstrProfReader32::hasFormat(*Buffer)) - Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); + Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn)); else if (TextInstrProfReader::hasFormat(*Buffer)) Result.reset(new TextInstrProfReader(std::move(Buffer))); else @@ -267,6 +258,8 @@ Error TextInstrProfReader::readHeader() { ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; else if (Str.equals_insensitive("not_entry_first")) ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; + else if (Str.equals_insensitive("single_byte_coverage")) + ProfileKind |= InstrProfKind::SingleByteCoverage; else if (Str.equals_insensitive("temporal_prof_traces")) { ProfileKind |= InstrProfKind::TemporalProfile; if (auto Err = readTemporalProfTraceData()) @@ -372,6 +365,14 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { return E; Value = IndexedInstrProf::ComputeHash(VD.first); } + } else if (ValueKind == IPVK_VTableTarget) { + if (InstrProfSymtab::isExternalSymbol(VD.first)) + Value = 0; + else { + if (Error E = Symtab->addVTableName(VD.first)) + return E; + Value = IndexedInstrProf::ComputeHash(VD.first); + } } else { READ_NUM(VD.first, Value); } @@ -433,6 +434,29 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { Record.Counts.push_back(Count); } + // Bitmap byte information is indicated with special character. + if (Line->startswith("$")) { + Record.BitmapBytes.clear(); + // Read the number of bitmap bytes. + uint64_t NumBitmapBytes; + if ((Line++)->drop_front(1).trim().getAsInteger(0, NumBitmapBytes)) + return error(instrprof_error::malformed, + "number of bitmap bytes is not a valid integer"); + if (NumBitmapBytes != 0) { + // Read each bitmap and fill our internal storage with the values. + Record.BitmapBytes.reserve(NumBitmapBytes); + for (uint8_t I = 0; I < NumBitmapBytes; ++I) { + if (Line.is_at_end()) + return error(instrprof_error::truncated); + uint8_t BitmapByte; + if ((Line++)->getAsInteger(0, BitmapByte)) + return error(instrprof_error::malformed, + "bitmap byte is not a valid integer"); + Record.BitmapBytes.push_back(BitmapByte); + } + } + } + // Check if value profile data exists and read it if so. if (Error E = readValueProfileData(Record)) return error(std::move(E)); @@ -471,7 +495,7 @@ bool RawInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { uint64_t Magic = *reinterpret_cast(DataBuffer.getBufferStart()); return RawInstrProf::getMagic() == Magic || - sys::getSwappedBytes(RawInstrProf::getMagic()) == Magic; + llvm::byteswap(RawInstrProf::getMagic()) == Magic; } template @@ -516,13 +540,29 @@ Error RawInstrProfReader::readNextHeader(const char *CurrentPos) { template Error RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { - if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) + if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart), + StringRef(VNamesStart, VNamesEnd - VNamesStart))) return error(std::move(E)); for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { const IntPtrT FPtr = swap(I->FunctionPointer); if (!FPtr) continue; - Symtab.mapAddress(FPtr, I->NameRef); + Symtab.mapAddress(FPtr, swap(I->NameRef)); + } + + if (VTableBegin != nullptr && VTableEnd != nullptr) { + for (const RawInstrProf::VTableProfileData *I = VTableBegin; + I != VTableEnd; ++I) { + const IntPtrT VPtr = swap(I->VTablePointer); + if (!VPtr) + continue; + // Map both begin and end address to the name hash, since the instrumented + // address could be somewhere in the middle. + // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks + // the end of vtable address. + Symtab.mapVTableAddress(VPtr, VPtr + swap(I->VTableSize), + swap(I->VTableNameHash)); + } } return success(); } @@ -539,33 +579,57 @@ Error RawInstrProfReader::readHeader( "\nPLEASE update this tool to version in the raw profile, or " "regenerate raw profile with expected version.") .str()); - if (useDebugInfoCorrelate() && !Correlator) - return error(instrprof_error::missing_debug_info_for_correlation); - if (!useDebugInfoCorrelate() && Correlator) - return error(instrprof_error::unexpected_debug_info_for_correlation); - BinaryIdsSize = swap(Header.BinaryIdsSize); - if (BinaryIdsSize % sizeof(uint64_t)) + uint64_t BinaryIdSize = swap(Header.BinaryIdsSize); + // Binary id start just after the header if exists. + const uint8_t *BinaryIdStart = + reinterpret_cast(&Header) + sizeof(RawInstrProf::Header); + const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize; + const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); + if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd) return error(instrprof_error::bad_header); + if (BinaryIdSize != 0) { + if (Error Err = + readBinaryIdsInternal(*DataBuffer, BinaryIdSize, BinaryIdStart, + BinaryIds, getDataEndianness())) + return Err; + } CountersDelta = swap(Header.CountersDelta); + BitmapDelta = swap(Header.BitmapDelta); NamesDelta = swap(Header.NamesDelta); - auto NumData = swap(Header.DataSize); + auto NumData = swap(Header.NumData); auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); - auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize(); + auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize(); auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); + auto NumBitmapBytes = swap(Header.NumBitmapBytes); + auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes); auto NamesSize = swap(Header.NamesSize); + auto VTableNameSize = swap(Header.VNamesSize); + auto NumVTables = swap(Header.NumVTables); ValueKindLast = swap(Header.ValueKindLast); auto DataSize = NumData * sizeof(RawInstrProf::ProfileData); - auto PaddingSize = getNumPaddingBytes(NamesSize); + auto PaddingBytesAfterNames = getNumPaddingBytes(NamesSize); + auto PaddingBytesAfterVTableNames = getNumPaddingBytes(VTableNameSize); + + auto VTableSectionSize = + NumVTables * sizeof(RawInstrProf::VTableProfileData); + auto PaddingBytesAfterVTableProfData = getNumPaddingBytes(VTableSectionSize); // Profile data starts after profile header and binary ids if exist. - ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize; ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; - ptrdiff_t NamesOffset = + ptrdiff_t BitmapOffset = CountersOffset + CountersSize + PaddingBytesAfterCounters; - ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; + ptrdiff_t NamesOffset = + BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes; + ptrdiff_t VTableProfDataOffset = + NamesOffset + NamesSize + PaddingBytesAfterNames; + ptrdiff_t VTableNameOffset = VTableProfDataOffset + VTableSectionSize + + PaddingBytesAfterVTableProfData; + ptrdiff_t ValueDataOffset = + VTableNameOffset + VTableNameSize + PaddingBytesAfterVTableNames; auto *Start = reinterpret_cast(&Header); if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) @@ -574,8 +638,9 @@ Error RawInstrProfReader::readHeader( if (Correlator) { // These sizes in the raw file are zero because we constructed them in the // Correlator. - assert(DataSize == 0 && NamesSize == 0); - assert(CountersDelta == 0 && NamesDelta == 0); + if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 && + NamesDelta == 0)) + return error(instrprof_error::unexpected_correlation_info); Data = Correlator->getDataPointer(); DataEnd = Data + Correlator->getDataSize(); NamesStart = Correlator->getNamesPointer(); @@ -584,21 +649,22 @@ Error RawInstrProfReader::readHeader( Data = reinterpret_cast *>( Start + DataOffset); DataEnd = Data + NumData; + VTableBegin = + reinterpret_cast *>( + Start + VTableProfDataOffset); + VTableEnd = VTableBegin + NumVTables; NamesStart = Start + NamesOffset; NamesEnd = NamesStart + NamesSize; + VNamesStart = Start + VTableNameOffset; + VNamesEnd = VNamesStart + VTableNameSize; } - // Binary ids start just after the header. - BinaryIdsStart = - reinterpret_cast(&Header) + sizeof(RawInstrProf::Header); CountersStart = Start + CountersOffset; CountersEnd = CountersStart + CountersSize; + BitmapStart = Start + BitmapOffset; + BitmapEnd = BitmapStart + NumBitmapBytes; ValueDataStart = reinterpret_cast(Start + ValueDataOffset); - const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); - if (BinaryIdsStart + BinaryIdsSize > BufferEnd) - return error(instrprof_error::bad_header); - std::unique_ptr NewSymtab = std::make_unique(); if (Error E = createSymtab(*NewSymtab)) return E; @@ -675,14 +741,61 @@ Error RawInstrProfReader::readRawCounts( // A value of zero signifies the block is covered. Record.Counts.push_back(*Ptr == 0 ? 1 : 0); } else { - const auto *CounterValue = reinterpret_cast(Ptr); - Record.Counts.push_back(swap(*CounterValue)); + uint64_t CounterValue = swap(*reinterpret_cast(Ptr)); + if (CounterValue > MaxCounterValue && Warn) + Warn(make_error( + instrprof_error::counter_value_too_large, Twine(CounterValue))); + + Record.Counts.push_back(CounterValue); } } return success(); } +template +Error RawInstrProfReader::readRawBitmapBytes(InstrProfRecord &Record) { + uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes); + + Record.BitmapBytes.clear(); + Record.BitmapBytes.reserve(NumBitmapBytes); + + // It's possible MCDC is either not enabled or only used for some functions + // and not others. So if we record 0 bytes, just move on. + if (NumBitmapBytes == 0) + return success(); + + // BitmapDelta decreases as we advance to the next data record. + ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta; + if (BitmapOffset < 0) + return error( + instrprof_error::malformed, + ("bitmap offset " + Twine(BitmapOffset) + " is negative").str()); + + if (BitmapOffset >= BitmapEnd - BitmapStart) + return error(instrprof_error::malformed, + ("bitmap offset " + Twine(BitmapOffset) + + " is greater than the maximum bitmap offset " + + Twine(BitmapEnd - BitmapStart - 1)) + .str()); + + uint64_t MaxNumBitmapBytes = + (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t); + if (NumBitmapBytes > MaxNumBitmapBytes) + return error(instrprof_error::malformed, + ("number of bitmap bytes " + Twine(NumBitmapBytes) + + " is greater than the maximum number of bitmap bytes " + + Twine(MaxNumBitmapBytes)) + .str()); + + for (uint32_t I = 0; I < NumBitmapBytes; I++) { + const char *Ptr = BitmapStart + BitmapOffset + I; + Record.BitmapBytes.push_back(swap(*Ptr)); + } + + return success(); +} + template Error RawInstrProfReader::readValueProfilingData( InstrProfRecord &Record) { @@ -733,6 +846,10 @@ Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) if (Error E = readRawCounts(Record)) return error(std::move(E)); + // Read raw bitmap bytes and set Record. + if (Error E = readRawBitmapBytes(Record)) + return error(std::move(E)); + // Read value data and set Record. if (Error E = readValueProfilingData(Record)) return error(std::move(E)); @@ -745,14 +862,16 @@ Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) template Error RawInstrProfReader::readBinaryIds( std::vector &BinaryIds) { - return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart, - BinaryIds, getDataEndianness()); + BinaryIds.insert(BinaryIds.begin(), this->BinaryIds.begin(), + this->BinaryIds.end()); + return Error::success(); } template Error RawInstrProfReader::printBinaryIds(raw_ostream &OS) { - return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart, - getDataEndianness()); + if (!BinaryIds.empty()) + printBinaryIdsInternal(OS, BinaryIds); + return Error::success(); } namespace llvm { @@ -794,6 +913,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, DataBuffer.clear(); std::vector CounterBuffer; + std::vector BitmapByteBuffer; const unsigned char *End = D + N; while (D < End) { @@ -819,7 +939,24 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, for (uint64_t J = 0; J < CountsSize; ++J) CounterBuffer.push_back(endian::readNext(D)); - DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); + // Read bitmap bytes for GET_VERSION(FormatVersion) > 10. + if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) { + uint64_t BitmapBytes = 0; + if (D + sizeof(uint64_t) > End) + return data_type(); + BitmapBytes = endian::readNext(D); + // Read bitmap byte values. + if (D + BitmapBytes * sizeof(uint8_t) > End) + return data_type(); + BitmapByteBuffer.clear(); + BitmapByteBuffer.reserve(BitmapBytes); + for (uint64_t J = 0; J < BitmapBytes; ++J) + BitmapByteBuffer.push_back(static_cast( + endian::readNext(D))); + } + + DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer), + std::move(BitmapByteBuffer)); // Read value profiling data. if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && @@ -1150,6 +1287,21 @@ Error IndexedInstrProfReader::readHeader() { "corrupted binary ids"); } + if (GET_VERSION(Header->formatVersion()) >= 12) { + uint64_t VTableNamesOffset = + endian::byte_swap(Header->VTableNamesOffset); + const unsigned char *Ptr = Start + VTableNamesOffset; + + CompressedVTableNamesLen = + support::endian::readNext(Ptr); + + // Writer first writes the length of compressed string, and then the actual + // content. + VTableNamePtr = (const char *)Ptr; + if (VTableNamePtr > (const char *)DataBuffer->getBufferEnd()) + return make_error(instrprof_error::truncated); + } + if (GET_VERSION(Header->formatVersion()) >= 10 && Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) { uint64_t TemporalProfTracesOffset = @@ -1203,7 +1355,15 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { if (Symtab) return *Symtab; - std::unique_ptr NewSymtab = std::make_unique(); + auto NewSymtab = std::make_unique(); + + if (Error E = NewSymtab->initVTableNamesFromCompressedStrings( + StringRef(VTableNamePtr, CompressedVTableNamesLen))) { + auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); + consumeError(error(ErrCode, Msg)); + } + + // finalizeSymtab is called inside populateSymtab. if (Error E = Index->populateSymtab(*NewSymtab)) { auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); consumeError(error(ErrCode, Msg)); @@ -1214,12 +1374,25 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { } Expected IndexedInstrProfReader::getInstrProfRecord( - StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) { + StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName, + uint64_t *MismatchedFuncSum) { ArrayRef Data; uint64_t FuncSum = 0; - Error Err = Remapper->getRecords(FuncName, Data); - if (Err) - return std::move(Err); + auto Err = Remapper->getRecords(FuncName, Data); + if (Err) { + // If we don't find FuncName, try DeprecatedFuncName to handle profiles + // built by older compilers. + auto Err2 = + handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error { + if (IE.get() != instrprof_error::unknown_function) + return make_error(IE); + if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data)) + return Err; + return Error::success(); + }); + if (Err2) + return std::move(Err2); + } // Found it. Look for counters with the right hash. // A flag to indicate if the records are from the same type @@ -1306,6 +1479,33 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, return success(); } +Error IndexedInstrProfReader::getFunctionBitmap(StringRef FuncName, + uint64_t FuncHash, + BitVector &Bitmap) { + Expected Record = getInstrProfRecord(FuncName, FuncHash); + if (Error E = Record.takeError()) + return error(std::move(E)); + + const auto &BitmapBytes = Record.get().BitmapBytes; + size_t I = 0, E = BitmapBytes.size(); + Bitmap.resize(E * CHAR_BIT); + BitVector::apply( + [&](auto X) { + using XTy = decltype(X); + alignas(XTy) uint8_t W[sizeof(X)]; + size_t N = std::min(E - I, sizeof(W)); + std::memset(W, 0, sizeof(W)); + std::memcpy(W, &BitmapBytes[I], N); + I += N; + return support::endian::read(W); + }, + Bitmap, Bitmap); + assert(I == E); + + return success(); +} + Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { ArrayRef Data; @@ -1328,8 +1528,11 @@ Error IndexedInstrProfReader::readBinaryIds( } Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) { - return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart, - llvm::support::little); + std::vector BinaryIds; + if (Error E = readBinaryIds(BinaryIds)) + return E; + printBinaryIdsInternal(OS, BinaryIds); + return Error::success(); } void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index b74d5c3862d803dd7462ece6bade459f65813886..b2ec59f174333b4e3b5898f268556b2376c4cae1 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" @@ -131,6 +132,8 @@ public: M += sizeof(uint64_t); // The function hash M += sizeof(uint64_t); // The size of the Counts vector M += ProfRecord.Counts.size() * sizeof(uint64_t); + M += sizeof(uint64_t); // The size of the Bitmap vector + M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t); // Value data M += ValueProfData::getSize(ProfileData.second); @@ -160,6 +163,10 @@ public: for (uint64_t I : ProfRecord.Counts) LE.write(I); + LE.write(ProfRecord.BitmapBytes.size()); + for (uint64_t I : ProfRecord.BitmapBytes) + LE.write(I); + // Write value data std::unique_ptr VDataPtr = ValueProfData::serializeFrom(ProfileData.second); @@ -174,10 +181,12 @@ public: InstrProfWriter::InstrProfWriter(bool Sparse, uint64_t TemporalProfTraceReservoirSize, - uint64_t MaxTemporalProfTraceLength) + uint64_t MaxTemporalProfTraceLength, + bool WritePrevVersion) : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), - InfoObj(new InstrProfRecordWriterTrait()) {} + InfoObj(new InstrProfRecordWriterTrait()), + WritePrevVersion(WritePrevVersion) {} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -380,6 +389,8 @@ bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { const InstrProfRecord &IPR = Func.second; if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; })) return true; + if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; })) + return true; } return false; } @@ -447,12 +458,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { Header.MemProfOffset = 0; Header.BinaryIdOffset = 0; Header.TemporalProfTracesOffset = 0; - int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); + Header.VTableNamesOffset = 0; - // Only write out all the fields except 'HashOffset', 'MemProfOffset', - // 'BinaryIdOffset' and `TemporalProfTracesOffset`. We need to remember the - // offset of these fields to allow back patching later. - for (int I = 0; I < N - 4; I++) + // Only write out the first four fields. We need to remember the offset of the + // remaining fields to allow back patching later. + for (int I = 0; I < 4; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -476,6 +486,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { uint64_t TemporalProfTracesOffset = OS.tell(); OS.write(0); + uint64_t VTableNamesOffset = OS.tell(); + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -589,6 +602,37 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.writeByte(0); } + uint64_t VTableNamesSectionStart = OS.tell(); + + if (!WritePrevVersion) { + std::vector VTableNameStrs; + for (StringRef VTableName : VTableNames.keys()) + VTableNameStrs.push_back(VTableName.str()); + + std::string CompressedVTableNames; + if (!VTableNameStrs.empty()) + if (Error E = collectGlobalObjectNameStrings( + VTableNameStrs, compression::zlib::isAvailable(), + CompressedVTableNames)) + return E; + + const uint64_t CompressedStringLen = CompressedVTableNames.length(); + + // Record the length of compressed string. + OS.write(CompressedStringLen); + + // Write the chars in compressed strings. + for (auto &c : CompressedVTableNames) + OS.writeByte(static_cast(c)); + + // Pad up to a multiple of 8. + // InstrProfReader could read bytes according to 'CompressedStringLen'. + const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); + + for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) + OS.writeByte(0); + } + uint64_t TemporalProfTracesSectionStart = 0; if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) { TemporalProfTracesSectionStart = OS.tell(); @@ -632,6 +676,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // Patch the Header.TemporalProfTracesOffset (=0 for profiles without // traces). {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1}, + {VTableNamesOffset, &VTableNamesSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, @@ -684,7 +729,8 @@ Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { std::unique_ptr VD = Func.getValueForSite(VK, S); DenseSet SeenValues; for (uint32_t I = 0; I < ND; I++) - if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(VD[I].Value).second) + if ((VK != IPVK_IndirectCallTarget && VK != IPVK_VTableTarget) && + !SeenValues.insert(VD[I].Value).second) return make_error(instrprof_error::invalid_prof); } } @@ -703,6 +749,17 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, for (uint64_t Count : Func.Counts) OS << Count << "\n"; + if (Func.BitmapBytes.size() > 0) { + OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n"; + OS << "# Bitmap Byte Values:\n"; + for (uint8_t Byte : Func.BitmapBytes) { + OS << "0x"; + OS.write_hex(Byte); + OS << "\n"; + } + OS << "\n"; + } + uint32_t NumValueKinds = Func.getNumValueKinds(); if (!NumValueKinds) { OS << "\n"; @@ -721,8 +778,8 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, OS << ND << "\n"; std::unique_ptr VD = Func.getValueForSite(VK, S); for (uint32_t I = 0; I < ND; I++) { - if (VK == IPVK_IndirectCallTarget) - OS << Symtab.getFuncNameOrExternalSymbol(VD[I].Value) << ":" + if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) + OS << Symtab.getFuncOrVarNameIfDefined(VD[I].Value) << ":" << VD[I].Count << "\n"; else OS << VD[I].Value << ":" << VD[I].Count << "\n"; @@ -743,6 +800,8 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (static_cast(ProfileKind & InstrProfKind::FunctionEntryInstrumentation)) OS << "# Always instrument the function entry block\n:entry_first\n"; + if (static_cast(ProfileKind & InstrProfKind::SingleByteCoverage)) + OS << "# Instrument block coverage\n:single_byte_coverage\n"; InstrProfSymtab Symtab; using FuncPair = detail::DenseMapPair; @@ -758,6 +817,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { } } + for (const auto &VTableName : VTableNames) + if (Error E = Symtab.addVTableName(VTableName.getKey())) + return E; + if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) writeTextTemporalProfTraceData(OS, Symtab); @@ -790,7 +853,7 @@ void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS, for (auto &Trace : TemporalProfTraces) { OS << "# Weight:\n" << Trace.Weight << "\n"; for (auto &NameRef : Trace.FunctionNameRefs) - OS << Symtab.getFuncName(NameRef) << ","; + OS << Symtab.getFuncOrVarName(NameRef) << ","; OS << "\n"; } OS << "\n"; diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 146fa3e06996c9edd5e3fa8eb72351d5fe73cb90..eef66f9a366423a792bce585c7296bc75b60f76b 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -13,13 +13,16 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" @@ -39,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +54,12 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); +extern cl::opt MaxNumVTableAnnotations; + +namespace llvm { +extern cl::opt EnableVTableProfileUse; +} + // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, @@ -102,13 +112,205 @@ static cl::opt ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); +// Indirect call promotion pass will fall back to function-based comparison if +// vtable-count / function-count is smaller than this threshold. +static cl::opt ICPVTablePercentageThreshold( + "icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden, + cl::desc("The percentage threshold of vtable-count / function-count for " + "cost-benefit analysis.")); + +// Although comparing vtables can save a vtable load, we may need to compare +// vtable pointer with multiple vtable address points due to class inheritance. +// Comparing with multiple vtables inserts additional instructions on hot code +// path, and doing so for an earlier candidate delays the comparisons for later +// candidates. For the last candidate, only the fallback path is affected. +// We allow multiple vtable comparison for the last function candidate and use +// the option below to cap the number of vtables. +static cl::opt ICPMaxNumVTableLastCandidate( + "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, + cl::desc("The maximum number of vtable for the last candidate.")); + +static cl::list ICPIgnoredBaseTypes( + "icp-ignored-base-types", cl::Hidden, + cl::desc( + "A list of mangled vtable type info names. Classes specified by the " + "type info names and their derived ones will not be vtable-ICP'ed. " + "Useful when the profiled types and actual types in the optimized " + "binary could be different due to profiling limitations. Type info " + "names are those string literals used in LLVM type metadata")); + namespace { +// The key is a vtable global variable, and the value is a map. +// In the inner map, the key represents address point offsets and the value is a +// constant for this address point. +using VTableAddressPointOffsetValMap = + SmallDenseMap>; + +// A struct to collect type information for a virtual call site. +struct VirtualCallSiteInfo { + // The offset from the address point to virtual function in the vtable. + uint64_t FunctionOffset; + // The instruction that computes the address point of vtable. + Instruction *VPtr; + // The compatible type used in LLVM type intrinsics. + StringRef CompatibleTypeStr; +}; + +// The key is a virtual call, and value is its type information. +using VirtualCallSiteTypeInfoMap = + SmallDenseMap; + +// The key is vtable GUID, and value is its value profile count. +using VTableGUIDCountsMap = SmallDenseMap; + +// Return the address point offset of the given compatible type. +// +// Type metadata of a vtable specifies the types that can contain a pointer to +// this vtable, for example, `Base*` can be a pointer to an derived type +// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html +static std::optional +getAddressPointOffset(const GlobalVariable &VTableVar, + StringRef CompatibleType) { + SmallVector Types; + VTableVar.getMetadata(LLVMContext::MD_type, Types); + + for (MDNode *Type : Types) + if (auto *TypeId = dyn_cast(Type->getOperand(1).get()); + TypeId && TypeId->getString() == CompatibleType) + return cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + return std::nullopt; +} + +// Return a constant representing the vtable's address point specified by the +// offset. +static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, + uint32_t AddressPointOffset) { + Module &M = *VTable->getParent(); + LLVMContext &Context = M.getContext(); + assert(AddressPointOffset < + M.getDataLayout().getTypeAllocSize(VTable->getValueType()) && + "Out-of-bound access"); + + return ConstantExpr::getInBoundsGetElementPtr( + Type::getInt8Ty(Context), VTable, + llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset)); +} + +// Return the basic block in which Use `U` is used via its `UserInst`. +static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) { + if (PHINode *PN = dyn_cast(UserInst)) + return PN->getIncomingBlock(U); + + return UserInst->getParent(); +} + +// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users +// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()` +// is the sole predecessor of `DestBB` and `DestBB` is dominated by +// `Inst->getParent()`. +static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { + // 'BB' is used only by assert. + [[maybe_unused]] BasicBlock *BB = Inst->getParent(); + + assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 && + DestBB->getUniquePredecessor() == BB && + "Guaranteed by ICP transformation"); + + BasicBlock *UserBB = nullptr; + for (Use &Use : Inst->uses()) { + User *User = Use.getUser(); + // Do checked cast since IR verifier guarantees that the user of an + // instruction must be an instruction. See `Verifier::visitInstruction`. + Instruction *UserInst = cast(User); + // We can sink debug or pseudo instructions together with Inst. + if (UserInst->isDebugOrPseudoInst()) + continue; + UserBB = getUserBasicBlock(Use, UserInst); + // Do not sink if Inst is used in a basic block that is not DestBB. + // TODO: Sink to the common dominator of all user blocks. + if (UserBB != DestBB) + return false; + } + return UserBB != nullptr; +} + +// For the virtual call dispatch sequence, try to sink vtable load instructions +// to the cold indirect call fallback. +// FIXME: Move the sink eligibility check below to a utility function in +// Transforms/Utils/ directory. +static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { + if (!isDestBBSuitableForSink(I, DestBlock)) + return false; + + // Do not move control-flow-involving, volatile loads, vaarg, alloca + // instructions, etc. + if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || + isa(I)) + return false; + + // Do not sink convergent call instructions. + if (const auto *C = dyn_cast(I)) + if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent()) + return false; + + // Do not move an instruction that may write to memory. + if (I->mayWriteToMemory()) + return false; + + // We can only sink load instructions if there is nothing between the load and + // the end of block that could change the value. + if (I->mayReadFromMemory()) { + // We already know that SrcBlock is the unique predecessor of DestBlock. + for (BasicBlock::iterator Scan = std::next(I->getIterator()), + E = I->getParent()->end(); + Scan != E; ++Scan) { + // Note analysis analysis can tell whether two pointers can point to the + // same object in memory or not thereby find further opportunities to + // sink. + if (Scan->mayWriteToMemory()) + return false; + } + } + + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); + I->moveBefore(*DestBlock, InsertPos); + + // TODO: Sink debug intrinsic users of I to 'DestBlock'. + // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and + // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have + // the core logic to do this. + return true; +} + +// Try to sink instructions after VPtr to the indirect call fallback. +// Return the number of sunk IR instructions. +static int tryToSinkInstructions(BasicBlock *OriginalBB, + BasicBlock *IndirectCallBB) { + int SinkCount = 0; + // Do not sink across a critical edge for simplicity. + if (IndirectCallBB->getUniquePredecessor() != OriginalBB) + return SinkCount; + // Sink all eligible instructions in OriginalBB in reverse order. + for (Instruction &I : + llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB)))) + if (tryToSinkInstruction(&I, IndirectCallBB)) + SinkCount++; + + return SinkCount; +} + // Promote indirect calls to conditional direct calls, keeping track of // thresholds. class IndirectCallPromoter { private: Function &F; + Module &M; + + ProfileSummaryInfo *PSI = nullptr; // Symtab that maps indirect call profile values to function names and // defines. @@ -116,13 +318,30 @@ private: const bool SamplePGO; + // A map from a virtual call to its type information. + const VirtualCallSiteTypeInfoMap &VirtualCSInfo; + + VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal; + OptimizationRemarkEmitter &ORE; + const DenseSet &IgnoredBaseTypes; + // A struct that records the direct target and it's call count. struct PromotionCandidate { Function *const TargetFunction; const uint64_t Count; + // The following fields only exists for promotion candidates with vtable + // information. + // + // Due to class inheritance, one virtual call candidate can come from + // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and + // counts for 'TargetFunction'. `AddressPoints` stores the vtable address + // points for comparison. + VTableGUIDCountsMap VTableGUIDAndCounts; + SmallVector AddressPoints; + PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} }; @@ -135,16 +354,67 @@ private: const CallBase &CB, const ArrayRef &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates); - // Promote a list of targets for one indirect-call callsite. Return - // the number of promotions. - uint32_t tryToPromote(CallBase &CB, - const std::vector &Candidates, - uint64_t &TotalCount); + // Promote a list of targets for one indirect-call callsite by comparing + // indirect callee with functions. Return true if there are IR + // transformations and false otherwise. + bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr, + ArrayRef Candidates, + uint64_t TotalCount, + ArrayRef ICallProfDataRef, + uint32_t NumCandidates, + VTableGUIDCountsMap &VTableGUIDCounts); + + // Promote a list of targets for one indirect call by comparing vtables with + // functions. Return true if there are IR transformations and false + // otherwise. + bool tryToPromoteWithVTableCmp( + CallBase &CB, Instruction *VPtr, + const std::vector &Candidates, + uint64_t TotalFuncCount, uint32_t NumCandidates, + MutableArrayRef ICallProfDataRef, + VTableGUIDCountsMap &VTableGUIDCounts); + + // Return true if it's profitable to compare vtables for the callsite. + bool isProfitableToCompareVTables( + const CallBase &CB, const std::vector &Candidates, + uint64_t TotalCount); + + // Return true if the vtable corresponding to VTableGUID should be skipped + // for vtable-based comparison. + bool shouldSkipVTable(uint64_t VTableGUID); + + // Given an indirect callsite and the list of function candidates, compute + // the following vtable information in output parameters and return vtable + // pointer if type profiles exist. + // - Populate `VTableGUIDCounts` with using !prof + // metadata attached on the vtable pointer. + // - For each function candidate, finds out the vtables from which it gets + // called and stores the in promotion candidate. + Instruction *computeVTableInfos(const CallBase *CB, + VTableGUIDCountsMap &VTableGUIDCounts, + std::vector &Candidates); + + Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV, + uint64_t AddressPointOffset); + + void updateFuncValueProfiles(CallBase &CB, ArrayRef VDs, + uint64_t Sum, uint32_t MaxMDCount); + + void updateVPtrValueProfiles(Instruction *VPtr, + VTableGUIDCountsMap &VTableGUIDCounts); public: - IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO, - OptimizationRemarkEmitter &ORE) - : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} + IndirectCallPromoter( + Function &Func, Module &M, ProfileSummaryInfo *PSI, + InstrProfSymtab *Symtab, bool SamplePGO, + const VirtualCallSiteTypeInfoMap &VirtualCSInfo, + VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal, + const DenseSet &IgnoredBaseTypes, + OptimizationRemarkEmitter &ORE) + : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO), + VirtualCSInfo(VirtualCSInfo), + VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE), + IgnoredBaseTypes(IgnoredBaseTypes) {} IndirectCallPromoter(const IndirectCallPromoter &) = delete; IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; @@ -240,20 +510,123 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite( return Ret; } +Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar( + GlobalVariable *GV, uint64_t AddressPointOffset) { + auto [Iter, Inserted] = + VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr); + if (Inserted) + Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset); + return Iter->second; +} + +Instruction *IndirectCallPromoter::computeVTableInfos( + const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap, + std::vector &Candidates) { + if (!EnableVTableProfileUse) + return nullptr; + + // Take the following code sequence as an example, here is how the code works + // @vtable1 = {[n x ptr] [... ptr @func1]} + // @vtable2 = {[m x ptr] [... ptr @func2]} + // + // %vptr = load ptr, ptr %d, !prof !0 + // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1") + // tail call void @llvm.assume(i1 %0) + // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1 + // %1 = load ptr, ptr %vfn + // call void %1(ptr %d), !prof !1 + // + // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50} + // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50} + // + // Step 1. Find out the %vptr instruction for indirect call and use its !prof + // to populate `GUIDCountsMap`. + // Step 2. For each vtable-guid, look up its definition from symtab. LTO can + // make vtable definitions visible across modules. + // Step 3. Compute the byte offset of the virtual call, by adding vtable + // address point offset and function's offset relative to vtable address + // point. For each function candidate, this step tells us the vtable from + // which it comes from, and the vtable address point to compare %vptr with. + + // Only virtual calls have virtual call site info. + auto Iter = VirtualCSInfo.find(CB); + if (Iter == VirtualCSInfo.end()) + return nullptr; + + LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #" + << NumOfPGOICallsites << "\n"); + + const auto &VirtualCallInfo = Iter->second; + Instruction *VPtr = VirtualCallInfo.VPtr; + + SmallDenseMap CalleeIndexMap; + for (size_t I = 0; I < Candidates.size(); I++) + CalleeIndexMap[Candidates[I].TargetFunction] = I; + + uint32_t ActualNumValueData = 0; + uint64_t TotalVTableCount = 0; + auto VTableValueDataArray = getValueProfDataFromInst( + *VirtualCallInfo.VPtr, IPVK_VTableTarget, MaxNumVTableAnnotations, + ActualNumValueData, TotalVTableCount); + if (VTableValueDataArray.get() == nullptr) + return VPtr; + + // Compute the functions and counts from by each vtable. + for (size_t j = 0; j < ActualNumValueData; j++) { + uint64_t VTableVal = VTableValueDataArray[j].Value; + GUIDCountsMap[VTableVal] = VTableValueDataArray[j].Count; + GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal); + if (!VTableVar) { + LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal + << "; maybe the vtable isn't imported\n"); + continue; + } + + std::optional MaybeAddressPointOffset = + getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr); + if (!MaybeAddressPointOffset) + continue; + + const uint64_t AddressPointOffset = *MaybeAddressPointOffset; + + Function *Callee = nullptr; + std::tie(Callee, std::ignore) = getFunctionAtVTableOffset( + VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M); + if (!Callee) + continue; + auto CalleeIndexIter = CalleeIndexMap.find(Callee); + if (CalleeIndexIter == CalleeIndexMap.end()) + continue; + + auto &Candidate = Candidates[CalleeIndexIter->second]; + // There shouldn't be duplicate GUIDs in one !prof metadata (except + // duplicated zeros), so assign counters directly won't cause overwrite or + // counter loss. + Candidate.VTableGUIDAndCounts[VTableVal] = VTableValueDataArray[j].Count; + Candidate.AddressPoints.push_back( + getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset)); + } + + return VPtr; +} + +// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight. +// Scales uint64_t counters down to uint32_t if necessary to prevent overflow. +static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, + uint64_t FalseWeight) { + MDBuilder MDB(Context); + uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight)); + return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale), + scaleBranchCount(FalseWeight, Scale)); +} + CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE) { - - uint64_t ElseCount = TotalCount - Count; - uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); - uint64_t Scale = calculateCountScale(MaxCount); - MDBuilder MDB(CB.getContext()); - MDNode *BranchWeights = MDB.createBranchWeights( - scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); - - CallBase &NewInst = - promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); + CallBase &NewInst = promoteCallWithIfThenElse( + CB, DirectCallee, + createBranchWeights(CB.getContext(), Count, TotalCount - Count)); if (AttachProfToDirectCall) { MDBuilder MDB(NewInst.getContext()); @@ -275,21 +648,177 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, } // Promote indirect-call to conditional direct-call for one callsite. -uint32_t IndirectCallPromoter::tryToPromote( - CallBase &CB, const std::vector &Candidates, - uint64_t &TotalCount) { +bool IndirectCallPromoter::tryToPromoteWithFuncCmp( + CallBase &CB, Instruction *VPtr, ArrayRef Candidates, + uint64_t TotalCount, ArrayRef ICallProfDataRef, + uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) { uint32_t NumPromoted = 0; for (const auto &C : Candidates) { - uint64_t Count = C.Count; - pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, - &ORE); - assert(TotalCount >= Count); - TotalCount -= Count; + uint64_t FuncCount = C.Count; + pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount, + SamplePGO, &ORE); + assert(TotalCount >= FuncCount); + TotalCount -= FuncCount; NumOfPGOICallPromotion++; NumPromoted++; + + if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty()) + continue; + + // After a virtual call candidate gets promoted, update the vtable's counts + // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents + // a vtable from which the virtual call is loaded. Compute the sum and use + // 128-bit APInt to improve accuracy. + uint64_t SumVTableCount = 0; + for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) + SumVTableCount += VTableCount; + + for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) { + APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/); + APFuncCount *= VTableCount; + VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue(); + } } - return NumPromoted; + if (NumPromoted == 0) + return false; + + assert(NumPromoted <= ICallProfDataRef.size() && + "Number of promoted functions should not be greater than the number " + "of values in profile metadata"); + + // Update value profiles on the indirect call. + updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount, + NumCandidates); + updateVPtrValueProfiles(VPtr, VTableGUIDCounts); + return true; +} + +void IndirectCallPromoter::updateFuncValueProfiles( + CallBase &CB, ArrayRef CallVDs, uint64_t TotalCount, + uint32_t MaxMDCount) { + // First clear the existing !prof. + CB.setMetadata(LLVMContext::MD_prof, nullptr); + // Annotate the remaining value profiles if counter is not zero. + if (TotalCount != 0) + annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget, + MaxMDCount); +} + +void IndirectCallPromoter::updateVPtrValueProfiles( + Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) { + if (!EnableVTableProfileUse || VPtr == nullptr || + !VPtr->getMetadata(LLVMContext::MD_prof)) + return; + VPtr->setMetadata(LLVMContext::MD_prof, nullptr); + std::vector VTableValueProfiles; + uint64_t TotalVTableCount = 0; + for (auto [GUID, Count] : VTableGUIDCounts) { + if (Count == 0) + continue; + + VTableValueProfiles.push_back({GUID, Count}); + TotalVTableCount += Count; + } + llvm::sort(VTableValueProfiles, + [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) { + return LHS.Count > RHS.Count; + }); + + annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount, + IPVK_VTableTarget, VTableValueProfiles.size()); +} + +bool IndirectCallPromoter::tryToPromoteWithVTableCmp( + CallBase &CB, Instruction *VPtr, + const std::vector &Candidates, uint64_t TotalFuncCount, + uint32_t NumCandidates, + MutableArrayRef ICallProfDataRef, + VTableGUIDCountsMap &VTableGUIDCounts) { + SmallVector PromotedFuncCount; + + for (const auto &Candidate : Candidates) { + for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) + VTableGUIDCounts[GUID] -= Count; + + // 'OriginalBB' is the basic block of indirect call. After each candidate + // is promoted, a new basic block is created for the indirect fallback basic + // block and indirect call `CB` is moved into this new BB. + BasicBlock *OriginalBB = CB.getParent(); + promoteCallWithVTableCmp( + CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints, + createBranchWeights(CB.getContext(), Candidate.Count, + TotalFuncCount - Candidate.Count)); + + int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent()); + + ORE.emit([&]() { + OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB); + + const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; + Remark << "Promote indirect call to " + << ore::NV("DirectCallee", Candidate.TargetFunction) + << " with count " << ore::NV("Count", Candidate.Count) + << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink " + << ore::NV("SinkCount", SinkCount) + << " instruction(s) and compare " + << ore::NV("VTable", VTableGUIDAndCounts.size()) + << " vtable(s): {"; + + // Sort GUIDs so remark message is deterministic. + std::set GUIDSet; + for (auto [GUID, Count] : VTableGUIDAndCounts) + GUIDSet.insert(GUID); + for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) { + if (Iter != GUIDSet.begin()) + Remark << ", "; + Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter)); + } + + Remark << "}"; + + return Remark; + }); + + PromotedFuncCount.push_back(Candidate.Count); + + assert(TotalFuncCount >= Candidate.Count && + "Within one prof metadata, total count is the sum of counts from " + "individual pairs"); + // Use std::min since 'TotalFuncCount' is the saturated sum of individual + // counts, see + // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288 + TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count); + NumOfPGOICallPromotion++; + } + + if (PromotedFuncCount.empty()) + return false; + + // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a + // a distinct 'VPtr'. + // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be + // used to load multiple virtual functions. The vtable profiles needs to be + // updated properly in that case (e.g, for each indirect call annotate both + // type profiles and function profiles in one !prof). + for (size_t I = 0; I < PromotedFuncCount.size(); I++) + ICallProfDataRef[I].Count -= + std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count); + // Sort value profiles by count in descending order. + llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS, + const InstrProfValueData &RHS) { + return LHS.Count > RHS.Count; + }); + // Drop the pair if count is zero. + ArrayRef VDs( + ICallProfDataRef.begin(), + llvm::upper_bound(ICallProfDataRef, 0U, + [](uint64_t Count, const InstrProfValueData &ProfData) { + return ProfData.Count <= Count; + })); + updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates); + updateVPtrValueProfiles(VPtr, VTableGUIDCounts); + return true; } // Traverse all the indirect-call callsite and get the value profile @@ -298,32 +827,181 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { bool Changed = false; ICallPromotionAnalysis ICallAnalysis; for (auto *CB : findIndirectCalls(F)) { - uint32_t NumVals, NumCandidates; + uint32_t NumCandidates; uint64_t TotalCount; auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( - CB, NumVals, TotalCount, NumCandidates); + CB, TotalCount, NumCandidates); if (!NumCandidates || (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount, /*isForICP=*/true))) continue; + auto PromotionCandidates = getPromotionCandidatesForCallSite( *CB, ICallProfDataRef, TotalCount, NumCandidates); - uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); - if (NumPromoted == 0) - continue; - Changed = true; - // Adjust the MD.prof metadata. First delete the old one. - CB->setMetadata(LLVMContext::MD_prof, nullptr); - // If all promoted, we don't need the MD.prof metadata. - if (TotalCount == 0 || NumPromoted == NumVals) - continue; - // Otherwise we need update with the un-promoted records back. - annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted), - TotalCount, IPVK_IndirectCallTarget, NumCandidates); + VTableGUIDCountsMap VTableGUIDCounts; + Instruction *VPtr = + computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates); + + if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount)) + Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates, + TotalCount, NumCandidates, + ICallProfDataRef, VTableGUIDCounts); + else + Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates, + TotalCount, ICallProfDataRef, + NumCandidates, VTableGUIDCounts); } return Changed; } +// TODO: Return false if the function addressing and vtable load instructions +// cannot sink to indirect fallback. +bool IndirectCallPromoter::isProfitableToCompareVTables( + const CallBase &CB, const std::vector &Candidates, + uint64_t TotalCount) { + if (!EnableVTableProfileUse || Candidates.empty()) + return false; + LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #" + << NumOfPGOICallsites << CB << "\n"); + uint64_t RemainingVTableCount = TotalCount; + const size_t CandidateSize = Candidates.size(); + for (size_t I = 0; I < CandidateSize; I++) { + auto &Candidate = Candidates[I]; + auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; + + LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: " + << Candidate.Count << ", VTableCounts:"); + // Add [[maybe_unused]] since are only used by LLVM_DEBUG. + for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts) + LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() + << ", " << Count << "}"); + LLVM_DEBUG(dbgs() << "\n"); + + uint64_t CandidateVTableCount = 0; + + for (auto &[GUID, Count] : VTableGUIDAndCounts) { + CandidateVTableCount += Count; + + if (shouldSkipVTable(GUID)) + return false; + } + + if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) { + LLVM_DEBUG( + dbgs() << " function count " << Candidate.Count + << " and its vtable sum count " << CandidateVTableCount + << " have discrepancies. Bail out vtable comparison.\n"); + return false; + } + + RemainingVTableCount -= Candidate.Count; + + // 'MaxNumVTable' limits the number of vtables to make vtable comparison + // profitable. Comparing multiple vtables for one function candidate will + // insert additional instructions on the hot path, and allowing more than + // one vtable for non last candidates may or may not elongate the dependency + // chain for the subsequent candidates. Set its value to 1 for non-last + // candidate and allow option to override it for the last candidate. + int MaxNumVTable = 1; + if (I == CandidateSize - 1) + MaxNumVTable = ICPMaxNumVTableLastCandidate; + + if ((int)Candidate.AddressPoints.size() > MaxNumVTable) { + LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got " + << Candidate.AddressPoints.size() + << " vtables. Bail out for vtable comparison.\n"); + return false; + } + } + + return true; +} + +bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) { + if (IgnoredBaseTypes.empty()) + return false; + + auto *VTableVar = Symtab->getGlobalVariable(VTableGUID); + + assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts"); + + SmallVector Types; + VTableVar->getMetadata(LLVMContext::MD_type, Types); + + for (auto *Type : Types) + if (auto *TypeId = dyn_cast(Type->getOperand(1).get())) + if (IgnoredBaseTypes.contains(TypeId->getString())) { + LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail " + "out of vtable comparison."); + return true; + } + return false; +} + +// For virtual calls in the module, collect per-callsite information which will +// be used to associate an ICP candidate with a vtable and a specific function +// in the vtable. With type intrinsics (llvm.type.test), we can find virtual +// calls in a compile-time efficient manner (by iterating its users) and more +// importantly use the compatible type later to figure out the function byte +// offset relative to the start of vtables. +static void +computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, + VirtualCallSiteTypeInfoMap &VirtualCSInfo) { + // Right now only llvm.type.test is used to find out virtual call sites. + // With ThinLTO and whole-program-devirtualization, llvm.type.test and + // llvm.public.type.test are emitted, and llvm.public.type.test is either + // refined to llvm.type.test or dropped before indirect-call-promotion pass. + // + // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted. + // Find out virtual calls by looking at users of llvm.type.checked.load in + // that case. + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + + if (!TypeTestFunc || TypeTestFunc->use_empty()) + TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::public_type_test)); + + if (!TypeTestFunc || TypeTestFunc->use_empty()) + return; + + auto &FAM = MAM.getResult(M).getManager(); + auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + // Iterate all type.test calls to find all indirect calls. + for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { + auto *CI = dyn_cast(U.getUser()); + if (!CI) + continue; + auto *TypeMDVal = cast(CI->getArgOperand(1)); + if (!TypeMDVal) + continue; + auto *CompatibleTypeId = dyn_cast(TypeMDVal->getMetadata()); + if (!CompatibleTypeId) + continue; + + // Find out all devirtualizable call sites given a llvm.type.test + // intrinsic call. + SmallVector DevirtCalls; + SmallVector Assumes; + auto &DT = LookupDomTree(*CI->getFunction()); + findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + + for (auto &DevirtCall : DevirtCalls) { + CallBase &CB = DevirtCall.CB; + // Given an indirect call, try find the instruction which loads a + // pointer to virtual table. + Instruction *VTablePtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(&CB); + if (!VTablePtr) + continue; + VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr, + CompatibleTypeId->getString()}; + } + } +} + // A wrapper function that does the actual work. static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM) { @@ -336,6 +1014,26 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, return false; } bool Changed = false; + VirtualCallSiteTypeInfoMap VirtualCSInfo; + + DenseSet IgnoredBaseTypes; + + if (EnableVTableProfileUse) { + computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); + + for (StringRef Str : ICPIgnoredBaseTypes) + IgnoredBaseTypes.insert(Str); + } + + // VTableAddressPointOffsetVal stores the vtable address points. The vtable + // address point of a given is static (doesn't + // change after being computed once). + // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map + // entry the first time a pair is seen, as + // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter + // repeatedly on each function. + VTableAddressPointOffsetValMap VTableAddressPointOffsetVal; + for (auto &F : M) { if (F.isDeclaration() || F.hasOptNone()) continue; @@ -344,7 +1042,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, MAM.getResult(M).getManager(); auto &ORE = FAM.getResult(F); - IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE); + IndirectCallPromoter CallPromoter( + F, M, PSI, &Symtab, SamplePGO, VirtualCSInfo, + VTableAddressPointOffsetVal, IgnoredBaseTypes, ORE); bool FuncChanged = CallPromoter.processFunction(PSI); if (ICPDUMPAFTER && FuncChanged) { LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 343554241da36c190a28593e9f74952e8811584e..33742f5241ace2e597c068a7cce92b3d0a6765fe 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -59,10 +59,27 @@ using namespace llvm; #define DEBUG_TYPE "instrprof" namespace llvm { -cl::opt - DebugInfoCorrelate("debug-info-correlate", - cl::desc("Use debug info to correlate profiles."), - cl::init(false)); +// Command line option to enable vtable value profiling. Defined in +// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= +extern cl::opt EnableVTableValueProfiling; +// TODO: Remove -debug-info-correlate in next LLVM release, in favor of +// -profile-correlate=debug-info. +cl::opt DebugInfoCorrelate( + "debug-info-correlate", + cl::desc("Use debug info to correlate profiles. (Deprecated, use " + "-profile-correlate=debug-info)"), + cl::init(false)); + +cl::opt ProfileCorrelate( + "profile-correlate", + cl::desc("Use debug info or binary file to correlate profiles."), + cl::init(InstrProfCorrelator::NONE), + cl::values(clEnumValN(InstrProfCorrelator::NONE, "", + "No profile correlation"), + clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", + "Use debug info to correlate"), + clEnumValN(InstrProfCorrelator::BINARY, "binary", + "Use binary to correlate"))); } // namespace llvm namespace { @@ -147,6 +164,8 @@ cl::opt SkipRetExitBlock( "skip-ret-exit-block", cl::init(true), cl::desc("Suppress counter promotion if exit blocks contain ret.")); +using LoadStorePair = std::pair; + /// /// A helper class to promote one counter RMW operation in the loop /// into register update. @@ -430,6 +449,15 @@ bool InstrProfiling::lowerIntrinsics(Function *F) { } else if (auto *IPVP = dyn_cast(&Instr)) { lowerValueProfileInst(IPVP); MadeChange = true; + } else if (auto *IPMP = dyn_cast(&Instr)) { + IPMP->eraseFromParent(); + MadeChange = true; + } else if (auto *IPBU = dyn_cast(&Instr)) { + lowerMCDCTestVectorBitmapUpdate(IPBU); + MadeChange = true; + } else if (auto *IPTU = dyn_cast(&Instr)) { + lowerMCDCCondBitmapUpdate(IPTU); + MadeChange = true; } } } @@ -544,21 +572,41 @@ bool InstrProfiling::run( // the instrumented function. This is counting the number of instrumented // target value sites to enter it as field in the profile data variable. for (Function &F : M) { - InstrProfInstBase *FirstProfInst = nullptr; - for (BasicBlock &BB : F) - for (auto I = BB.begin(), E = BB.end(); I != E; I++) + InstrProfCntrInstBase *FirstProfInst = nullptr; + InstrProfMCDCBitmapParameters *FirstProfMCDCParams = nullptr; + for (BasicBlock &BB : F) { + for (auto I = BB.begin(), E = BB.end(); I != E; I++) { if (auto *Ind = dyn_cast(I)) computeNumValueSiteCounts(Ind); - else if (FirstProfInst == nullptr && - (isa(I) || isa(I))) - FirstProfInst = dyn_cast(I); + else { + if (FirstProfInst == nullptr && + (isa(I) || isa(I))) + FirstProfInst = dyn_cast(I); + if (FirstProfMCDCParams == nullptr) + FirstProfMCDCParams = dyn_cast(I); + } + } + } - // Value profiling intrinsic lowering requires per-function profile data - // variable to be created first. - if (FirstProfInst != nullptr) + // If the MCDCBitmapParameters intrinsic was seen, create the bitmaps. + if (FirstProfMCDCParams != nullptr) { + static_cast(getOrCreateRegionBitmaps(FirstProfMCDCParams)); + } + + // Use a profile intrinsic to create the region counters and data variable. + // Also create the data variable based on the MCDCParams. + if (FirstProfInst != nullptr) { static_cast(getOrCreateRegionCounters(FirstProfInst)); + createDataVariable(FirstProfInst, FirstProfMCDCParams); + } } + if (EnableVTableValueProfiling) + for (GlobalVariable &GV : M.globals()) + // Global variables with type metadata are virtual table variables. + if (GV.hasMetadata(LLVMContext::MD_type)) + getOrCreateVTableProfData(&GV); + for (Function &F : M) MadeChange |= lowerIntrinsics(&F); @@ -572,6 +620,7 @@ bool InstrProfiling::run( emitVNodes(); emitNameData(); + emitVTableNames(); // Emit runtime hook for the cases where the target does not unconditionally // require pulling in profile runtime, and coverage is enabled on code that is @@ -625,7 +674,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { // in lightweight mode. We need to move the value profile pointer to the // Counter struct to get this working. assert( - !DebugInfoCorrelate && + !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE && "Value profiling is not yet supported with lightweight instrumentation"); GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); @@ -670,7 +719,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Ind->eraseFromParent(); } -Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { +Value *InstrProfiling::getCounterAddress(InstrProfCntrInstBase *I) { auto *Counters = getOrCreateRegionCounters(I); IRBuilder<> Builder(I); @@ -710,6 +759,25 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { return Builder.CreateIntToPtr(Add, Addr->getType()); } +Value *InstrProfiling::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { + auto *Bitmaps = getOrCreateRegionBitmaps(I); + IRBuilder<> Builder(I); + + auto *Addr = Builder.CreateConstInBoundsGEP2_32( + Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); + + if (isRuntimeCounterRelocationEnabled()) { + LLVMContext &Ctx = M->getContext(); + Ctx.diagnose(DiagnosticInfoPGOProfile( + M->getName().data(), + Twine("Runtime counter relocation is presently not supported for MC/DC " + "bitmaps."), + DS_Warning)); + } + + return Addr; +} + void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) { auto *Addr = getCounterAddress(CoverInstruction); IRBuilder<> Builder(CoverInstruction); @@ -769,6 +837,86 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { CoverageNamesVar->eraseFromParent(); } +void InstrProfiling::lowerMCDCTestVectorBitmapUpdate( + InstrProfMCDCTVBitmapUpdate *Update) { + IRBuilder<> Builder(Update); + auto *Int8Ty = Type::getInt8Ty(M->getContext()); + auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + auto *Int32Ty = Type::getInt32Ty(M->getContext()); + auto *Int64Ty = Type::getInt64Ty(M->getContext()); + auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); + auto *BitmapAddr = getBitmapAddress(Update); + + // Load Temp Val. + // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 + auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); + + // Calculate byte offset using div8. + // %1 = lshr i32 %mcdc.temp, 3 + auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3); + + // Add byte offset to section base byte address. + // %2 = zext i32 %1 to i64 + // %3 = add i64 ptrtoint (ptr @__profbm_test to i64), %2 + auto *BitmapByteAddr = + Builder.CreateAdd(Builder.CreatePtrToInt(BitmapAddr, Int64Ty), + Builder.CreateZExtOrBitCast(BitmapByteOffset, Int64Ty)); + + // Convert to a pointer. + // %4 = inttoptr i32 %3 to ptr + BitmapByteAddr = Builder.CreateIntToPtr(BitmapByteAddr, Int8PtrTy); + + // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8) + // %5 = and i32 %mcdc.temp, 7 + // %6 = trunc i32 %5 to i8 + auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty); + + // Shift bit offset left to form a bitmap. + // %7 = shl i8 1, %6 + auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet); + + // Load profile bitmap byte. + // %mcdc.bits = load i8, ptr %4, align 1 + auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits"); + + // Perform logical OR of profile bitmap byte and shifted bit offset. + // %8 = or i8 %mcdc.bits, %7 + auto *Result = Builder.CreateOr(Bitmap, ShiftedVal); + + // Store the updated profile bitmap byte. + // store i8 %8, ptr %3, align 1 + Builder.CreateStore(Result, BitmapByteAddr); + Update->eraseFromParent(); +} + +void InstrProfiling::lowerMCDCCondBitmapUpdate( + InstrProfMCDCCondBitmapUpdate *Update) { + IRBuilder<> Builder(Update); + auto *Int32Ty = Type::getInt32Ty(M->getContext()); + auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); + + // Load the MCDC temporary value from the stack. + // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 + auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); + + // Zero-extend the evaluated condition boolean value (0 or 1) by 32bits. + // %1 = zext i1 %tobool to i32 + auto *CondV_32 = Builder.CreateZExt(Update->getCondBool(), Int32Ty); + + // Shift the boolean value left (by the condition's ID) to form a bitmap. + // %2 = shl i32 %1, getCondID()> + auto *ShiftedVal = Builder.CreateShl(CondV_32, Update->getCondID()); + + // Perform logical OR of the bitmap against the loaded MCDC temporary value. + // %3 = or i32 %mcdc.temp, %2 + auto *Result = Builder.CreateOr(Temp, ShiftedVal); + + // Store the updated temporary value back to the stack. + // store i32 %3, ptr %mcdc.addr, align 4 + Builder.CreateStore(Result, MCDCCondBitmapAddr); + Update->eraseFromParent(); +} + /// Get the name of a profiling variable for a particular function. static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, bool &Renamed) { @@ -924,37 +1072,133 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { return true; } -GlobalVariable * -InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name, - GlobalValue::LinkageTypes Linkage) { - uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); - auto &Ctx = M->getContext(); - GlobalVariable *GV; - if (isa(Inc)) { - auto *CounterTy = Type::getInt8Ty(Ctx); - auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); - // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. - std::vector InitialValues(NumCounters, - Constant::getAllOnesValue(CounterTy)); - GV = new GlobalVariable(*M, CounterArrTy, false, Linkage, - ConstantArray::get(CounterArrTy, InitialValues), - Name); - GV->setAlignment(Align(1)); - } else { - auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); - GV = new GlobalVariable(*M, CounterTy, false, Linkage, - Constant::getNullValue(CounterTy), Name); - GV->setAlignment(Align(8)); +void InstrProfiling::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, + StringRef VarName) { + // Place lowered global variables in a comdat group if the associated function + // or global variable is a COMDAT. This will make sure that only one copy of + // global variable (e.g. function counters) of the COMDAT function will be + // emitted after linking. + bool NeedComdat = needsComdatForCounter(*GO, *M); + bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); + const bool DataReferencedByCode = profDataReferencedByCode(*M); + + if (!UseComdat) + return; + + StringRef GroupName = + TT.isOSBinFormatCOFF() && DataReferencedByCode ? GV->getName() : VarName; + Comdat *C = M->getOrInsertComdat(GroupName); + if (!NeedComdat) + C->setSelectionKind(Comdat::NoDeduplicate); + GV->setComdat(C); + // COFF doesn't allow the comdat group leader to have private linkage, so + // upgrade private linkage to internal linkage to produce a symbol table + // entry. + if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) + GV->setLinkage(GlobalValue::InternalLinkage); +} + +static inline bool shouldRecordVTableAddr(GlobalVariable *GV) { + if (!profDataReferencedByCode(*GV->getParent())) + return false; + + if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() && + !GV->hasAvailableExternallyLinkage()) + return true; + + // This avoids the profile data from referencing internal symbols in + // COMDAT. + if (GV->hasLocalLinkage() && GV->hasComdat()) + return false; + + return true; +} + +// FIXME: Introduce an internal alias like what's done for functions to reduce +// the number of relocation entries. +static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) { + auto *Int8PtrTy = PointerType::getUnqual(GV->getContext()); + + // Store a nullptr in __profvt_ if a real address shouldn't be used. + if (!shouldRecordVTableAddr(GV)) + return ConstantPointerNull::get(Int8PtrTy); + + return ConstantExpr::getBitCast(GV, Int8PtrTy); +} + +void InstrProfiling::getOrCreateVTableProfData(GlobalVariable *GV) { + assert(!DebugInfoCorrelate && + "Value profiling is not supported with lightweight instrumentation"); + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return; + + // Skip llvm internal global variable or __prof variables. + if (GV->getName().starts_with("llvm.") || + GV->getName().starts_with("__llvm") || + GV->getName().starts_with("__prof")) + return; + + // VTableProfData already created + auto It = VTableDataMap.find(GV); + if (It != VTableDataMap.end() && It->second) + return; + + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + GlobalValue::VisibilityTypes Visibility = GV->getVisibility(); + + // This is to keep consistent with per-function profile data + // for correctness. + if (TT.isOSBinFormatXCOFF()) { + Linkage = GlobalValue::InternalLinkage; + Visibility = GlobalValue::DefaultVisibility; } - return GV; + + LLVMContext &Ctx = M->getContext(); + Type *DataTypes[] = { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType, +#include "llvm/ProfileData/InstrProfData.inc" +#undef INSTR_PROF_VTABLE_DATA + }; + + auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes)); + + // Used by INSTR_PROF_VTABLE_DATA MACRO + Constant *VTableAddr = getVTableAddrForProfData(GV); + const std::string PGOVTableName = getPGOName(*GV); + // Record the length of the vtable. This is needed since vtable pointers + // loaded from C++ objects might be from the middle of a vtable definition. + uint32_t VTableSizeVal = + M->getDataLayout().getTypeAllocSize(GV->getValueType()); + + Constant *DataVals[] = { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init, +#include "llvm/ProfileData/InstrProfData.inc" +#undef INSTR_PROF_VTABLE_DATA + }; + + auto *Data = + new GlobalVariable(*M, DataTy, /*constant=*/false, Linkage, + ConstantStruct::get(DataTy, DataVals), + getInstrProfVTableVarPrefix() + PGOVTableName); + + Data->setVisibility(Visibility); + Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat())); + Data->setAlignment(Align(8)); + + maybeSetComdat(Data, GV, Data->getName()); + + VTableDataMap[GV] = Data; + + ReferencedVTables.push_back(GV); + + // VTable is used by runtime but not referenced by other + // sections. Conservatively mark it linker retained. + UsedVars.push_back(Data); } -GlobalVariable * -InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { +GlobalVariable *InstrProfiling::setupProfileSection(InstrProfInstBase *Inc, + InstrProfSectKind IPSK) { GlobalVariable *NamePtr = Inc->getName(); - auto &PD = ProfileDataMap[NamePtr]; - if (PD.RegionCounters) - return PD.RegionCounters; // Match the linkage and visibility of the name global. Function *Fn = Inc->getParent()->getParent(); @@ -963,8 +1207,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { // Use internal rather than private linkage so the counter variable shows up // in the symbol table when using debug info for correlation. - if (DebugInfoCorrelate && TT.isOSBinFormatMachO() && - Linkage == GlobalValue::PrivateLinkage) + if ((DebugInfoCorrelate || + ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) && + TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage) Linkage = GlobalValue::InternalLinkage; // Due to the limitation of binder as of 2021/09/28, the duplicate weak @@ -993,42 +1238,101 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { // nodeduplicate COMDAT which is lowered to a zero-flag section group. This // allows -z start-stop-gc to discard the entire group when the function is // discarded. - bool DataReferencedByCode = profDataReferencedByCode(*M); - bool NeedComdat = needsComdatForCounter(*Fn, *M); bool Renamed; - std::string CntsVarName = - getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); - std::string DataVarName = - getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); - auto MaybeSetComdat = [&](GlobalVariable *GV) { - bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); - if (UseComdat) { - StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode - ? GV->getName() - : CntsVarName; - Comdat *C = M->getOrInsertComdat(GroupName); - if (!NeedComdat) - C->setSelectionKind(Comdat::NoDeduplicate); - GV->setComdat(C); - // COFF doesn't allow the comdat group leader to have private linkage, so - // upgrade private linkage to internal linkage to produce a symbol table - // entry. - if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) - GV->setLinkage(GlobalValue::InternalLinkage); - } - }; + GlobalVariable *Ptr; + StringRef VarPrefix; + std::string VarName; + if (IPSK == IPSK_cnts) { + VarPrefix = getInstrProfCountersVarPrefix(); + VarName = getVarName(Inc, VarPrefix, Renamed); + InstrProfCntrInstBase *CntrIncrement = dyn_cast(Inc); + Ptr = createRegionCounters(CntrIncrement, VarName, Linkage); + } else if (IPSK == IPSK_bitmap) { + VarPrefix = getInstrProfBitmapVarPrefix(); + VarName = getVarName(Inc, VarPrefix, Renamed); + InstrProfMCDCBitmapInstBase *BitmapUpdate = + dyn_cast(Inc); + Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage); + } else { + llvm_unreachable("Profile Section must be for Counters or Bitmaps"); + } + Ptr->setVisibility(Visibility); + // Put the counters and bitmaps in their own sections so linkers can + // remove unneeded sections. + Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat())); + Ptr->setLinkage(Linkage); + maybeSetComdat(Ptr, Fn, VarName); + return Ptr; +} + +GlobalVariable * +InstrProfiling::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, + StringRef Name, + GlobalValue::LinkageTypes Linkage) { + uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue(); + auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M->getContext()), NumBytes); + auto GV = new GlobalVariable(*M, BitmapTy, false, Linkage, + Constant::getNullValue(BitmapTy), Name); + GV->setAlignment(Align(1)); + return GV; +} + +GlobalVariable * +InstrProfiling::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + if (PD.RegionBitmaps) + return PD.RegionBitmaps; + + // If RegionBitmaps doesn't already exist, create it by first setting up + // the corresponding profile section. + auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap); + PD.RegionBitmaps = BitmapPtr; + return PD.RegionBitmaps; +} + +GlobalVariable * +InstrProfiling::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, + GlobalValue::LinkageTypes Linkage) { uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); - LLVMContext &Ctx = M->getContext(); + auto &Ctx = M->getContext(); + GlobalVariable *GV; + if (isa(Inc)) { + auto *CounterTy = Type::getInt8Ty(Ctx); + auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); + // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. + std::vector InitialValues(NumCounters, + Constant::getAllOnesValue(CounterTy)); + GV = new GlobalVariable(*M, CounterArrTy, false, Linkage, + ConstantArray::get(CounterArrTy, InitialValues), + Name); + GV->setAlignment(Align(1)); + } else { + auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); + GV = new GlobalVariable(*M, CounterTy, false, Linkage, + Constant::getNullValue(CounterTy), Name); + GV->setAlignment(Align(8)); + } + return GV; +} - auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage); - CounterPtr->setVisibility(Visibility); - CounterPtr->setSection( - getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); - CounterPtr->setLinkage(Linkage); - MaybeSetComdat(CounterPtr); +GlobalVariable * +InstrProfiling::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + if (PD.RegionCounters) + return PD.RegionCounters; + + // If RegionCounters doesn't already exist, create it by first setting up + // the corresponding profile section. + auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); PD.RegionCounters = CounterPtr; - if (DebugInfoCorrelate) { + + if (DebugInfoCorrelate || + ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { + LLVMContext &Ctx = M->getContext(); + Function *Fn = Inc->getParent()->getParent(); if (auto *SP = Fn->getSubprogram()) { DIBuilder DB(*M, true, SP->getUnit()); Metadata *FunctionNameAnnotation[] = { @@ -1066,8 +1370,50 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); } + + // Mark the counter variable as used so that it isn't optimized out. + CompilerUsedVars.push_back(PD.RegionCounters); + } + + return PD.RegionCounters; +} + +void InstrProfiling::createDataVariable(InstrProfCntrInstBase *Inc, + InstrProfMCDCBitmapParameters *Params) { + // When debug information is correlated to profile data, a data variable + // is not needed. + if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) + return; + + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + + LLVMContext &Ctx = M->getContext(); + + Function *Fn = Inc->getParent()->getParent(); + GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); + GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); + + // Due to the limitation of binder as of 2021/09/28, the duplicate weak + // symbols in the same csect won't be discarded. When there are duplicate weak + // symbols, we can NOT guarantee that the relocations get resolved to the + // intended weak symbol, so we can not ensure the correctness of the relative + // CounterPtr, so we have to use private linkage for counter and data symbols. + if (TT.isOSBinFormatXCOFF()) { + Linkage = GlobalValue::PrivateLinkage; + Visibility = GlobalValue::DefaultVisibility; } + bool DataReferencedByCode = profDataReferencedByCode(*M); + bool NeedComdat = needsComdatForCounter(*Fn, *M); + bool Renamed; + + // The Data Variable section is anchored to profile counters. + std::string CntsVarName = + getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); + std::string DataVarName = + getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); + auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for // the current function. @@ -1085,16 +1431,17 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { ValuesVar->setSection( getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(Align(8)); - MaybeSetComdat(ValuesVar); + maybeSetComdat(ValuesVar, Fn, CntsVarName); ValuesPtrExpr = ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } - if (DebugInfoCorrelate) { - // Mark the counter variable as used so that it isn't optimized out. - CompilerUsedVars.push_back(PD.RegionCounters); - return PD.RegionCounters; - } + uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); + auto *CounterPtr = PD.RegionCounters; + + uint64_t NumBitmapBytes = 0; + if (Params != nullptr) + NumBitmapBytes = Params->getNumBitmapBytes()->getZExtValue(); // Create data variable. auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext()); @@ -1131,11 +1478,29 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { } auto *Data = new GlobalVariable(*M, DataTy, false, Linkage, nullptr, DataVarName); - // Reference the counter variable with a label difference (link-time - // constant). - auto *RelativeCounterPtr = - ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy), - ConstantExpr::getPtrToInt(Data, IntPtrTy)); + Constant *RelativeCounterPtr; + GlobalVariable *BitmapPtr = PD.RegionBitmaps; + Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0); + InstrProfSectKind DataSectionKind; + // With binary profile correlation, profile data is not loaded into memory. + // profile data must reference profile counter with an absolute relocation. + if (ProfileCorrelate == InstrProfCorrelator::BINARY) { + DataSectionKind = IPSK_covdata; + RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy); + if (BitmapPtr != nullptr) + RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy); + } else { + // Reference the counter variable with a label difference (link-time + // constant). + DataSectionKind = IPSK_data; + RelativeCounterPtr = + ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy), + ConstantExpr::getPtrToInt(Data, IntPtrTy)); + if (BitmapPtr != nullptr) + RelativeBitmapPtr = + ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy), + ConstantExpr::getPtrToInt(Data, IntPtrTy)); + } Constant *DataVals[] = { #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, @@ -1144,9 +1509,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { Data->setInitializer(ConstantStruct::get(DataTy, DataVals)); Data->setVisibility(Visibility); - Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); + Data->setSection( + getInstrProfSectionName(DataSectionKind, TT.getObjectFormat())); Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); - MaybeSetComdat(Data); + maybeSetComdat(Data, Fn, CntsVarName); PD.DataVar = Data; @@ -1158,8 +1524,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { NamePtr->setLinkage(GlobalValue::PrivateLinkage); // Collect the referenced names to be used by emitNameData. ReferencedNames.push_back(NamePtr); - - return PD.RegionCounters; } void InstrProfiling::emitVNodes() { @@ -1232,7 +1596,9 @@ void InstrProfiling::emitNameData() { getInstrProfNamesVarName()); NamesSize = CompressedNameStr.size(); NamesVar->setSection( - getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); + ProfileCorrelate == InstrProfCorrelator::BINARY + ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat()) + : getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); // On COFF, it's important to reduce the alignment down to 1 to prevent the // linker from inserting padding before the start of the names section or // between names entries. @@ -1245,6 +1611,31 @@ void InstrProfiling::emitNameData() { NamePtr->eraseFromParent(); } +void InstrProfiling::emitVTableNames() { + if (!EnableVTableValueProfiling || ReferencedVTables.empty()) + return; + + // Collect the PGO names of referenced vtables and compress them. + std::string CompressedVTableNames; + if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames, + DoInstrProfNameCompression)) { + report_fatal_error(Twine(toString(std::move(E))), false); + } + + auto &Ctx = M->getContext(); + auto *VTableNamesVal = ConstantDataArray::getString( + Ctx, StringRef(CompressedVTableNames), false /* AddNull */); + GlobalVariable *VTableNamesVar = + new GlobalVariable(*M, VTableNamesVal->getType(), true /* constant */, + GlobalValue::PrivateLinkage, VTableNamesVal, + getInstrProfVTableNamesVarName()); + VTableNamesVar->setSection( + getInstrProfSectionName(IPSK_vname, TT.getObjectFormat())); + VTableNamesVar->setAlignment(Align(1)); + // Make VTableNames linker retained. + UsedVars.push_back(VTableNamesVar); +} + void InstrProfiling::emitRegistration() { if (!needsRuntimeRegistrationOfSectionRange(TT)) return; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 789ed005d03ddfe86ef4beb5d1ef7b3f287ba5e4..453dd00488b8ffef9656fe51e9132e86ea902abf 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -679,12 +679,26 @@ static void readMemprof(Module &M, Function &F, const TargetLibraryInfo &TLI) { auto &Ctx = M.getContext(); - auto FuncName = getPGOFuncName(F); + auto FuncName = getIRPGOFuncName(F); auto FuncGUID = Function::getGUID(FuncName); - Expected MemProfResult = - MemProfReader->getMemProfRecord(FuncGUID); - if (Error E = MemProfResult.takeError()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + std::optional MemProfRec; + auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); + if (Err) { + // If we don't find getIRPGOFuncName(), try getPGOFuncName() to handle + // profiles built by older compilers + Err = handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error { + if (IE.get() != instrprof_error::unknown_function) + return make_error(IE); + auto FuncName = getPGOFuncName(F); + auto FuncGUID = Function::getGUID(FuncName); + if (auto Err = + MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec)) + return Err; + return Error::success(); + }); + } + if (Err) { + handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { auto Err = IPE.get(); bool SkipWarning = false; LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName @@ -722,15 +736,14 @@ static void readMemprof(Module &M, Function &F, // the frame array (see comments below where the map entries are added). std::map *, unsigned>>> LocHashToCallSites; - const auto MemProfRec = std::move(MemProfResult.get()); - for (auto &AI : MemProfRec.AllocSites) { + for (auto &AI : MemProfRec->AllocSites) { // Associate the allocation info with the leaf frame. The later matching // code will match any inlined call sequences in the IR with a longer prefix // of call stack frames. uint64_t StackId = computeStackId(AI.CallStack[0]); LocHashToAllocInfo[StackId].insert(&AI); } - for (auto &CS : MemProfRec.CallSites) { + for (auto &CS : MemProfRec->CallSites) { // Need to record all frames from leaf up to and including this function, // as any of these may or may not have been inlined at this point. unsigned Idx = 0; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index b9459b59e70455eca6a4840082fe1a5eea02e08f..d24b463a0adfbfdad086638a365e2d07220865f3 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -318,6 +318,8 @@ static cl::opt PGOFunctionCriticalEdgeThreshold( cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold.")); +extern cl::opt MaxNumVTableAnnotations; + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -327,7 +329,11 @@ extern cl::opt PGOViewCounts; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt ViewBlockFreqFuncName; -extern cl::opt DebugInfoCorrelate; +// Command line option to enable vtable value profiling. Defined in +// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= +extern cl::opt EnableVTableValueProfiling; +extern cl::opt EnableVTableProfileUse; +extern cl::opt ProfileCorrelate; } // namespace llvm static cl::opt @@ -382,7 +388,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { ProfileVersion |= VARIANT_MASK_CSIR_PROF; if (PGOInstrumentEntry) ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; - if (DebugInfoCorrelate) + if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; if (PGOFunctionEntryCoverage) ProfileVersion |= @@ -525,6 +531,7 @@ public: std::vector> ValueSites; SelectInstVisitor SIVisitor; std::string FuncName; + std::string DeprecatedFuncName; GlobalVariable *FuncNameVar; // CFG hash value for this function. @@ -584,13 +591,16 @@ public: NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfPGOBB += MST.BBInfos.size(); ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); + if (EnableVTableValueProfiling) + ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); } else { NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfCSPGOBB += MST.BBInfos.size(); } - FuncName = getPGOFuncName(F); + FuncName = getIRPGOFuncName(F); + DeprecatedFuncName = getPGOFuncName(F); computeCFGHash(); if (!ComdatMembers.empty()) renameComdatFunction(); @@ -1061,7 +1071,7 @@ public: : F(Func), M(Modu), BFI(BFIin), PSI(PSI), FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, InstrumentFuncEntry, HasSingleByteCoverage), - FreqAttr(FFA_Normal), IsCS(IsCS) {} + FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); @@ -1143,6 +1153,8 @@ private: // Is to use the context sensitive profile. bool IsCS; + ValueProfileCollector VPC; + // Find the Instrumented BB and set the value. Return false on error. bool setInstrumentedCounts(const std::vector &CountFromProfile); @@ -1336,7 +1348,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, auto &Ctx = M->getContext(); uint64_t MismatchedFuncSum = 0; Expected Result = PGOReader->getInstrProfRecord( - FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); + FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, + &MismatchedFuncSum); if (Error E = Result.takeError()) { handleInstrProfError(std::move(E), MismatchedFuncSum); return false; @@ -1381,7 +1394,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { uint64_t MismatchedFuncSum = 0; Expected Result = PGOReader->getInstrProfRecord( - FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); + FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, + &MismatchedFuncSum); if (auto Err = Result.takeError()) { handleInstrProfError(std::move(Err), MismatchedFuncSum); return; @@ -1697,6 +1711,14 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { llvm_unreachable("Unknown visiting mode"); } +static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { + if (ValueProfKind == IPVK_MemOPSize) + return MaxNumMemOPAnnotations; + if (ValueProfKind == llvm::IPVK_VTableTarget) + return MaxNumVTableAnnotations; + return MaxNumAnnotations; +} + // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { if (DisableValueProfiling) @@ -1713,8 +1735,23 @@ void PGOUseFunc::annotateValueSites() { void PGOUseFunc::annotateValueSites(uint32_t Kind) { assert(Kind <= IPVK_Last); unsigned ValueSiteIndex = 0; - auto &ValueSites = FuncInfo.ValueSites[Kind]; + unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); + + // Since there isn't a reliable or fast way for profile reader to tell if a + // profile is generated with `-enable-vtable-value-profiling` on, we run the + // value profile collector over the function IR to find the instrumented sites + // iff function profile records shows the number of instrumented vtable sites + // is not zero. Function cfg already takes the number of instrumented + // indirect call sites into account so it doesn't hash the number of + // instrumented vtables; as a side effect it makes it easier to enable + // profiling and profile use in two steps if needed. + // TODO: Remove this if/when -enable-vtable-value-profiling is on by default. + if (NumValueSites > 0 && Kind == IPVK_VTableTarget && + NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() && + MaxNumVTableAnnotations != 0) + FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); + auto &ValueSites = FuncInfo.ValueSites[Kind]; if (NumValueSites != ValueSites.size()) { auto &Ctx = M->getContext(); Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -1731,10 +1768,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind << "): Index = " << ValueSiteIndex << " out of " << NumValueSites << "\n"); - annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, - static_cast(Kind), ValueSiteIndex, - Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations - : MaxNumAnnotations); + annotateValueSite( + *M, *I.AnnotatedInst, ProfileRecord, + static_cast(Kind), ValueSiteIndex, + getMaxNumAnnotations(static_cast(Kind))); ValueSiteIndex++; } } @@ -1797,6 +1834,15 @@ static bool InstrumentAllFunctions( // (before LTO/ThinLTO linking) to create these variables. if (!IsCS) createIRLevelProfileFlagVar(M, /*IsCS=*/false); + + Triple TT(M.getTargetTriple()); + LLVMContext &Ctx = M.getContext(); + if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) + Ctx.diagnose(DiagnosticInfoPGOProfile( + M.getName().data(), + Twine("VTable value profiling is presently not " + "supported for non-ELF object formats"), + DS_Warning)); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -2008,6 +2054,16 @@ static bool annotateAllFunctions( return false; } + if (EnableVTableProfileUse) { + for (GlobalVariable &G : M.globals()) { + if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) + continue; + + // Create the PGOFuncName meta data. + createPGONameMetadata(G, getPGOName(G, false /* InLTO*/)); + } + } + // Add the profile summary (read from the header of the indexed summary) here // so that we can use it below when reading counters (which checks if the // function should be marked with a cold or inlinehint attribute). diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 3a129de1acd02d6ba0540d63780e86581fbbcd6b..b47ef8523ea1123a1674229904733e165e2d55be 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -90,9 +90,38 @@ public: } }; +///--------------------- VirtualTableValueProfilingPlugin -------------------- +class VTableProfilingPlugin { + Function &F; + +public: + static constexpr InstrProfValueKind Kind = IPVK_VTableTarget; + + VTableProfilingPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {} + + void run(std::vector &Candidates) { + std::vector Result = findVTableAddrs(F); + for (Instruction *I : Result) { + Instruction *InsertPt = I->getNextNonDebugInstruction(); + // When finding an insertion point, keep PHI and EH pad instructions + // before vp intrinsics. This is similar to + // `BasicBlock::getFirstInsertionPt`. + while (InsertPt && (dyn_cast(InsertPt) || InsertPt->isEHPad())) + InsertPt = InsertPt->getNextNonDebugInstruction(); + // Skip instrumentating the value if InsertPt is the last instruction. + // FIXME: Set InsertPt to the end of basic block to instrument the value + // if InsertPt is the last instruction. + if (InsertPt == nullptr) + continue; + + Instruction *AnnotatedInst = I; + Candidates.emplace_back(CandidateInfo{I, InsertPt, AnnotatedInst}); + } + } +}; + ///----------------------- Registration of the plugins ------------------------- /// For now, registering a plugin with the ValueProfileCollector is done by /// adding the plugin type to the VP_PLUGIN_LIST macro. -#define VP_PLUGIN_LIST \ - MemIntrinsicPlugin, \ - IndirectCallPromotionPlugin +#define VP_PLUGIN_LIST \ + MemIntrinsicPlugin, IndirectCallPromotionPlugin, VTableProfilingPlugin diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index b488e3bb0cbd1ec20d21426ade4ff026352c1b40..8333b08b7076dcb28d4736b869471fa17e968fed 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CallPromotionUtils.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/AttributeMask.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -188,10 +190,9 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// Predicate and clone the given call site. /// /// This function creates an if-then-else structure at the location of the call -/// site. The "if" condition compares the call site's called value to the given -/// callee. The original call site is moved into the "else" block, and a clone -/// of the call site is placed in the "then" block. The cloned instruction is -/// returned. +/// site. The "if" condition is specified by `Cond`. +/// The original call site is moved into the "else" block, and a clone of the +/// call site is placed in the "then" block. The cloned instruction is returned. /// /// For example, the call instruction below: /// @@ -202,7 +203,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// Is replace by the following: /// /// orig_bb: -/// %cond = icmp eq i32 ()* %ptr, @func +/// %cond = Cond /// br i1 %cond, %then_bb, %else_bb /// /// then_bb: @@ -232,7 +233,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// Is replace by the following: /// /// orig_bb: -/// %cond = icmp eq i32 ()* %ptr, @func +/// %cond = Cond /// br i1 %cond, %then_bb, %else_bb /// /// then_bb: @@ -267,7 +268,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// Is replaced by the following: /// /// cond_bb: -/// %cond = icmp eq i32 ()* %ptr, @func +/// %cond = Cond /// br i1 %cond, %then_bb, %orig_bb /// /// then_bb: @@ -280,19 +281,13 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// ; The original call instruction stays in its original block. /// %t0 = musttail call i32 %ptr() /// ret %t0 -CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee, - MDNode *BranchWeights) { +static CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond, + MDNode *BranchWeights) { IRBuilder<> Builder(&CB); CallBase *OrigInst = &CB; BasicBlock *OrigBlock = OrigInst->getParent(); - // Create the compare. The called value and callee must have the same type to - // be compared. - if (CB.getCalledOperand()->getType() != Callee->getType()) - Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType()); - auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee); - if (OrigInst->isMustTailCall()) { // Create an if-then structure. The original instruction stays in its block, // and a clone of the original instruction is placed in the "then" block. @@ -380,6 +375,22 @@ CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee, return *NewInst; } +// Predicate and clone the given call site using condition `CB.callee == +// Callee`. See the comment `versionCallSiteWithCond` for the transformation. +CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee, + MDNode *BranchWeights) { + + IRBuilder<> Builder(&CB); + + // Create the compare. The called value and callee must have the same type to + // be compared. + if (CB.getCalledOperand()->getType() != Callee->getType()) + Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType()); + auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee); + + return versionCallSiteWithCond(CB, Cond, BranchWeights); +} + bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason) { assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted"); @@ -559,6 +570,27 @@ CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee, return promoteCall(NewInst, Callee); } +CallBase &llvm::promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, + Function *Callee, + ArrayRef AddressPoints, + MDNode *BranchWeights) { + assert(!AddressPoints.empty() && "Caller should guarantee"); + IRBuilder<> Builder(&CB); + SmallVector ICmps; + for (auto &AddressPoint : AddressPoints) + ICmps.push_back(Builder.CreateICmpEQ(VPtr, AddressPoint)); + + // TODO: Perform tree height reduction if the number of ICmps is high. + Value *Cond = Builder.CreateOr(ICmps); + + // Version the indirect call site. If Cond is true, 'NewInst' will be + // executed, otherwise the original call site will be executed. + CallBase &NewInst = versionCallSiteWithCond(CB, Cond, BranchWeights); + + // Promote 'NewInst' so that it directly calls the desired function. + return promoteCall(NewInst, Callee); +} + bool llvm::tryPromoteCall(CallBase &CB) { assert(!CB.getCalledFunction()); Module *M = CB.getCaller()->getParent(); @@ -597,16 +629,13 @@ bool llvm::tryPromoteCall(CallBase &CB) { // Not in the form of a global constant variable with an initializer. return false; - Constant *VTableGVInitializer = GV->getInitializer(); APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset; if (!(VTableGVOffset.getActiveBits() <= 64)) return false; // Out of range. - Constant *Ptr = getPointerAtOffset(VTableGVInitializer, - VTableGVOffset.getZExtValue(), - *M); - if (!Ptr) - return false; // No constant (function) pointer found. - Function *DirectCallee = dyn_cast(Ptr->stripPointerCasts()); + + Function *DirectCallee = nullptr; + std::tie(DirectCallee, std::ignore) = + getFunctionAtVTableOffset(GV, VTableGVOffset.getZExtValue(), *M); if (!DirectCallee) return false; // No function pointer found. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f7b93fc8fd060965fb7bd9ae4ca19f520fbb7dab..a098ea5322d929babf1ad28d964c864fc088e2de 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" @@ -54,6 +55,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1807,13 +1809,29 @@ void llvm::updateProfileCallee( ? 0 : PriorEntryCount + EntryDelta; + auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, + const uint64_t PriorEntryCount) { + Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); + if (VPtr) + scaleProfData(*VPtr, NewEntryCount, PriorEntryCount); + }; + // During inlining ? if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; - for (auto Entry : *VMap) + for (auto Entry : *VMap) { if (isa(Entry.first)) - if (auto *CI = dyn_cast_or_null(Entry.second)) + if (auto *CI = dyn_cast_or_null(Entry.second)) { CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); + } + + if (isa(Entry.first)) + if (auto *II = dyn_cast_or_null(Entry.second)) { + II->updateProfWeight(CloneEntryCount, PriorEntryCount); + updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); + } + } } if (EntryDelta) { @@ -1822,9 +1840,16 @@ void llvm::updateProfileCallee( for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) - for (Instruction &I : BB) - if (CallInst *CI = dyn_cast(&I)) + for (Instruction &I : BB) { + if (CallInst *CI = dyn_cast(&I)) { CI->updateProfWeight(NewEntryCount, PriorEntryCount); + updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); + } + if (InvokeInst *II = dyn_cast(&I)) { + II->updateProfWeight(NewEntryCount, PriorEntryCount); + updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); + } + } } } diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll new file mode 100644 index 0000000000000000000000000000000000000000..ba3ce9a75ee83213804ae2eb2be3e79d96269d24 --- /dev/null +++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll @@ -0,0 +1,74 @@ +; Promote at most one function and annotate at most one vtable. +; As a result, only one value (of each relevant kind) shows up in the function +; summary. + +; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o + +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s + +; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS +; Round trip it through llvm-as +; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS + +; CHECK: +; CHECK-NEXT: +; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction +; that loads vtable pointers. +; CHECK-NEXT: +; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the +; indirect call instruction. +; CHECK-NEXT: +; NOTE vtables and functions from Derived class is dropped because +; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one. +; has the format [valueid, flags, instcount, funcflags, +; numrefs, rorefcnt, worefcnt, +; m x valueid, +; n x (valueid, hotness+tailcall)] +; CHECK-NEXT: +; CHECK-NEXT: + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function has one BB and an entry count of 150, so the BB is hot according to +; ProfileSummary and reflected so in the bitcode (see llvm-dis output). +define i32 @_Z4testP4Base(ptr %0) !prof !15 { + %2 = load ptr, ptr %0, !prof !16 + %3 = load ptr, ptr %2 + %4 = tail call i32 %3(ptr %0), !prof !17 + ret i32 %4 +} + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 200} +!6 = !{!"MaxInternalCount", i64 200} +!7 = !{!"MaxFunctionCount", i64 200} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 990000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} + +!15 = !{!"function_entry_count", i32 150} +; 1960855528937986108 is the MD5 hash of _ZTV4Base, and +; 13870436605473471591 is the MD5 hash of _ZTV7Derived +!16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50} +; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and +; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv +!17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50} + +; ModuleSummaryIndex stores map in std::map; so +; global value summares are printed out in the order that gv's guid increases. +; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) +; DIS: ^1 = gv: (guid: 1960855528937986108) +; DIS: ^2 = gv: (guid: 5459407273543877811) +; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965 +; DIS: ^4 = blockcount: 0 diff --git a/llvm/test/Instrumentation/InstrProfiling/coverage.ll b/llvm/test/Instrumentation/InstrProfiling/coverage.ll index 1401d8f620b3f44d1841d41601e121d89bff2a0e..08cbcaa962b7652144946676229331501c0d76d2 100644 --- a/llvm/test/Instrumentation/InstrProfiling/coverage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/coverage.ll @@ -1,11 +1,19 @@ ; RUN: opt < %s -passes=instrprof -S | FileCheck %s +; RUN: opt < %s -passes=instrprof -profile-correlate=binary -S | FileCheck %s --check-prefix=BINARY target triple = "aarch64-unknown-linux-gnu" @__profn_foo = private constant [3 x i8] c"foo" ; CHECK: @__profc_foo = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1 +; CHECK: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_foo to i64) +; BINARY: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_foo to i64), @__profn_bar = private constant [3 x i8] c"bar" ; CHECK: @__profc_bar = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1 +; CHECK: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_bar to i64) +; BINARY: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_bar to i64), + +; CHECK: @__llvm_prf_nm = {{.*}} section "__llvm_prf_names" +; BINARY: @__llvm_prf_nm ={{.*}} section "__llvm_covnames" define void @_Z3foov() { call void @llvm.instrprof.cover(ptr @__profn_foo, i64 12345678, i32 1, i32 0) diff --git a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll new file mode 100644 index 0000000000000000000000000000000000000000..fccb026c25bf26c572f7bd336593bfd913b4627d --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll @@ -0,0 +1,53 @@ +; Check that MC/DC intrinsics are properly lowered +; RUN: opt < %s -passes=instrprof -S | FileCheck %s +; RUN: opt < %s -passes=instrprof -runtime-counter-relocation -S 2>&1 | FileCheck %s --check-prefix RELOC + +; RELOC: Runtime counter relocation is presently not supported for MC/DC bitmaps + +target triple = "x86_64-unknown-linux-gnu" + +@__profn_test = private constant [4 x i8] c"test" + +; CHECK: @__profbm_test = private global [1 x i8] zeroinitializer, section "__llvm_prf_bits", comdat, align 1 + +define dso_local void @test(i32 noundef %A) { +entry: + %A.addr = alloca i32, align 4 + %mcdc.addr = alloca i32, align 4 + call void @llvm.instrprof.cover(ptr @__profn_test, i64 99278, i32 5, i32 0) + ; CHECK: store i8 0, ptr @__profc_test, align 1 + + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_test, i64 99278, i32 1) + store i32 0, ptr %mcdc.addr, align 4 + %0 = load i32, ptr %A.addr, align 4 + %tobool = icmp ne i32 %0, 0 + + call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 99278, i32 0, ptr %mcdc.addr, i1 %tobool) + ; CHECK: %mcdc.temp = load i32, ptr %mcdc.addr, align 4 + ; CHECK-NEXT: %1 = zext i1 %tobool to i32 + ; CHECK-NEXT: %2 = shl i32 %1, 0 + ; CHECK-NEXT: %3 = or i32 %mcdc.temp, %2 + ; CHECK-NEXT: store i32 %3, ptr %mcdc.addr, align 4 + + call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 99278, i32 1, i32 0, ptr %mcdc.addr) + ; CHECK: %mcdc.temp1 = load i32, ptr %mcdc.addr, align 4 + ; CHECK-NEXT: %4 = lshr i32 %mcdc.temp1, 3 + ; CHECK-NEXT: %5 = zext i32 %4 to i64 + ; CHECK-NEXT: %6 = add i64 ptrtoint (ptr @__profbm_test to i64), %5 + ; CHECK-NEXT: %7 = inttoptr i64 %6 to ptr + ; CHECK-NEXT: %8 = and i32 %mcdc.temp1, 7 + ; CHECK-NEXT: %9 = trunc i32 %8 to i8 + ; CHECK-NEXT: %10 = shl i8 1, %9 + ; CHECK-NEXT: %mcdc.bits = load i8, ptr %7, align 1 + ; CHECK-NEXT: %11 = or i8 %mcdc.bits, %10 + ; CHECK-NEXT: store i8 %11, ptr %7, align 1 + ret void +} + +declare void @llvm.instrprof.cover(ptr, i64, i32, i32) + +declare void @llvm.instrprof.mcdc.parameters(ptr, i64, i32) + +declare void @llvm.instrprof.mcdc.condbitmap.update(ptr, i64, i32, ptr, i1) + +declare void @llvm.instrprof.mcdc.tvbitmap.update(ptr, i64, i32, i32, ptr) diff --git a/llvm/test/Transforms/Inline/update_invoke_prof.ll b/llvm/test/Transforms/Inline/update_invoke_prof.ll new file mode 100644 index 0000000000000000000000000000000000000000..12eb7dbf418c58bae3bfd78bffa1122f52e1d50a --- /dev/null +++ b/llvm/test/Transforms/Inline/update_invoke_prof.ll @@ -0,0 +1,100 @@ +; Tests that instructions with value profiles and count-type branch weights are +; updated in both caller and callee after inline, but invoke instructions with +; taken or not taken branch probabilities are not updated. + +; RUN: opt < %s -passes='require,cgscc(inline)' -S | FileCheck %s + +declare i32 @__gxx_personality_v0(...) + +define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 { + call void @callee(ptr %func), !prof !16 + + ret void +} + +declare void @callee1(ptr %func) + +declare void @callee2(ptr %func) + +define void @callee(ptr %obj) personality ptr @__gxx_personality_v0 !prof !17 { + %vtable = load ptr, ptr %obj, !prof !21 + %func = load ptr, ptr %vtable + invoke void %func() + to label %next unwind label %lpad, !prof !18 + +next: + invoke void @callee1(ptr %func) + to label %cont unwind label %lpad, !prof !19 + +cont: + invoke void @callee2(ptr %func) + to label %ret unwind label %lpad, !prof !20 + +lpad: + %exn = landingpad {ptr, i32} + cleanup + unreachable + +ret: + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 1000} +!17 = !{!"function_entry_count", i32 1500} +!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600} +!19 = !{!"branch_weights", i32 1500} +!20 = !{!"branch_weights", i32 1234, i32 5678} +!21 = !{!"VP", i32 2, i64 1500, i64 789, i64 900, i64 321, i64 600} + +; CHECK-LABEL: define void @caller( +; CHECK-SAME: ptr [[FUNC:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF14:![0-9]+]] { +; CHECK-NEXT: [[VTABLE_I:%.*]] = load ptr, ptr [[FUNC]], align 8, !prof [[PROF15:![0-9]+]] +; CHECK-NEXT: [[FUNC_I:%.*]] = load ptr, ptr [[VTABLE_I]], align 8 +; CHECK-NEXT: invoke void [[FUNC_I]]() +; CHECK-NEXT: to label %[[NEXT_I:.*]] unwind label %[[LPAD_I:.*]], !prof [[PROF16:![0-9]+]] +; CHECK: [[NEXT_I]]: +; CHECK-NEXT: invoke void @callee1(ptr [[FUNC_I]]) +; CHECK-NEXT: to label %[[CONT_I:.*]] unwind label %[[LPAD_I]], !prof [[PROF17:![0-9]+]] +; CHECK: [[CONT_I]]: +; CHECK-NEXT: invoke void @callee2(ptr [[FUNC_I]]) +; CHECK-NEXT: to label %[[CALLEE_EXIT:.*]] unwind label %[[LPAD_I]], !prof [[PROF18:![0-9]+]] +; + +; CHECK-LABEL: define void @callee( +; CHECK-SAME: ptr [[OBJ:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF19:![0-9]+]] { +; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]], align 8, !prof [[PROF20:![0-9]+]] +; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; CHECK-NEXT: invoke void [[FUNC]]() +; CHECK-NEXT: to label %[[NEXT:.*]] unwind label %[[LPAD:.*]], !prof [[PROF21:![0-9]+]] +; CHECK: [[NEXT]]: +; CHECK-NEXT: invoke void @callee1(ptr [[FUNC]]) +; CHECK-NEXT: to label %[[CONT:.*]] unwind label %[[LPAD]], !prof [[PROF22:![0-9]+]] +; CHECK: [[CONT]]: +; CHECK-NEXT: invoke void @callee2(ptr [[FUNC]]) +; CHECK-NEXT: to label %[[RET:.*]] unwind label %[[LPAD]], !prof [[PROF18]] + +; CHECK: [[PROF14]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF15]] = !{!"VP", i32 2, i64 1000, i64 789, i64 600, i64 321, i64 400} +; CHECK: [[PROF16]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400} +; CHECK: [[PROF17]] = !{!"branch_weights", i32 1000} +; CHECK: [[PROF18]] = !{!"branch_weights", i32 1234, i32 5678} +; CHECK: [[PROF19]] = !{!"function_entry_count", i64 500} +; CHECK: [[PROF20]] = !{!"VP", i32 2, i64 500, i64 789, i64 300, i64 321, i64 200} +; CHECK: [[PROF21]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200} +; CHECK: [[PROF22]] = !{!"branch_weights", i32 500} diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b458781f08b16af4776ac80121e3853dafad048 --- /dev/null +++ b/llvm/test/Transforms/Inline/update_value_profile.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -passes='require,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; When 'callee' is inlined into caller1 and caller2, the indirect call and vtable +; value profiles of the inlined copy should be scaled based on callers' profiles. +; The indirect call and vtable value profiles in 'callee' should be updated. +define i32 @callee(ptr %0, i32 %1) !prof !19 { +; CHECK-LABEL: define i32 @callee( +; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %3 = load ptr, ptr %0, !prof !15 + %4 = getelementptr inbounds i8, ptr %3, i64 8 + %5 = load ptr, ptr %4 + %6 = tail call i32 %5(ptr %0, i32 %1), !prof !16 + ret i32 %6 +} + +define i32 @caller1(i32 %0) !prof !17 { +; CHECK-LABEL: define i32 @caller1( +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF3:![0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF4:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = tail call ptr @_Z10createTypei(i32 %0) + %3 = tail call i32 @callee(ptr %2, i32 %0) + ret i32 %3 +} + +define i32 @caller2(i32 %0) !prof !18 { +; CHECK-LABEL: define i32 @caller2( +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF6:![0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF7:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF8:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = tail call ptr @_Z10createTypei(i32 %0) + %3 = tail call i32 @callee(ptr %2, i32 %0) + ret i32 %3 +} + +declare ptr @_Z10createTypei(i32) + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"VP", i32 2, i64 1600, i64 321, i64 1000, i64 789, i64 600} +!16 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600} +!17 = !{!"function_entry_count", i64 1000} +!18 = !{!"function_entry_count", i64 600} +!19 = !{!"function_entry_count", i64 1700} +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100} +; CHECK: [[PROF1]] = !{!"VP", i32 2, i64 94, i64 321, i64 58, i64 789, i64 35} +; CHECK: [[PROF2]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35} +; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF4]] = !{!"VP", i32 2, i64 941, i64 321, i64 588, i64 789, i64 352} +; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352} +; CHECK: [[PROF6]] = !{!"function_entry_count", i64 600} +; CHECK: [[PROF7]] = !{!"VP", i32 2, i64 564, i64 321, i64 352, i64 789, i64 211} +; CHECK: [[PROF8]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211} +;. diff --git a/llvm/test/Transforms/PGOProfile/Inputs/coverage.proftext b/llvm/test/Transforms/PGOProfile/Inputs/coverage.proftext new file mode 100644 index 0000000000000000000000000000000000000000..229530ba414065d87642c536c29df3d24d28f03b --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/coverage.proftext @@ -0,0 +1,54 @@ +:ir +:single_byte_coverage + +foo +# Func Hash: +848064302753700500 +# Num Counters: +2 +# Counter Values: +3 +4 + + +bar +# Func Hash: +848064302952419074 +# Num Counters: +2 +# Counter Values: +2 +0 + + +goo +# Func Hash: +1106497858086895615 +# Num Counters: +1 +# Counter Values: +5 + + +loop +# Func Hash: +92940490389974880 +# Num Counters: +2 +# Counter Values: +1 +1 + + +hoo +# Func Hash: +1073332642652768409 +# Num Counters: +6 +# Counter Values: +1 +0 +1 +1 +0 +0 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw new file mode 100644 index 0000000000000000000000000000000000000000..3daa98f937b691880ffff203c9426bfacddf749d Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw differ diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll index a1cf115f910ad75195f039a0bf328ed3a728df2d..1bad0db1b476249f931b11ddac821e68e36cde78 100644 --- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll +++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll @@ -13,9 +13,9 @@ $foo = comdat any ; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat ; CHECK-NOT: __profn__stdin__foo ; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 -; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null +; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), i64 0, ptr null ; CHECK-NOT: @foo -; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8 +; CHECK-SAME: , ptr null, i32 1, [3 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.compiler.used diff --git a/llvm/test/Transforms/PGOProfile/coverage.ll b/llvm/test/Transforms/PGOProfile/coverage.ll index d636be14a2cc99ec38144f0d33253f0c777f66eb..1b7658a2ea9ed0726519d1eb50af16484445f6d7 100644 --- a/llvm/test/Transforms/PGOProfile/coverage.ll +++ b/llvm/test/Transforms/PGOProfile/coverage.ll @@ -1,14 +1,20 @@ -; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,ENTRY -; RUN: opt < %s -passes=pgo-instr-gen -pgo-block-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,BLOCK +; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,GEN,ENTRY +; RUN: opt < %s -passes=pgo-instr-gen -pgo-block-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,GEN,BLOCK + +; RUN: llvm-profdata merge %S/Inputs/coverage.proftext -o %t.profdata +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefixes=CHECK,USE target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +; CHECK-LABEL: @foo() +; USE-SAME: !prof ![[HOT:[0-9]+]] define void @foo() { ; CHECK-LABEL: entry: entry: ; ENTRY: call void @llvm.instrprof.cover({{.*}}) %c = call i1 @choice() br i1 %c, label %if.then, label %if.else + ; USE: br i1 %c, label %if.then, label %if.else, !prof ![[WEIGHTS0:[0-9]+]] ; CHECK-LABEL: if.then: if.then: @@ -25,12 +31,15 @@ if.end: ret void } +; CHECK-LABEL: @bar() +; USE-SAME: !prof ![[HOT:[0-9]+]] define void @bar() { ; CHECK-LABEL: entry: entry: ; ENTRY: call void @llvm.instrprof.cover({{.*}}) %c = call i1 @choice() br i1 %c, label %if.then, label %if.end + ; USE: br i1 %c, label %if.then, label %if.end, !prof ![[WEIGHTS1:[0-9]+]] ; CHECK-LABEL: if.then: if.then: @@ -43,24 +52,29 @@ if.end: ret void } +; CHECK-LABEL: @goo() +; USE-SAME: !prof ![[HOT:[0-9]+]] define void @goo() { ; CHECK-LABEL: entry: entry: - ; CHECK: call void @llvm.instrprof.cover({{.*}}) + ; GEN: call void @llvm.instrprof.cover({{.*}}) ret void } +; CHECK-LABEL: @loop() +; USE-SAME: !prof ![[HOT:[0-9]+]] define void @loop() { ; CHECK-LABEL: entry: entry: - ; CHECK: call void @llvm.instrprof.cover({{.*}}) + ; GEN: call void @llvm.instrprof.cover({{.*}}) br label %while while: ; BLOCK: call void @llvm.instrprof.cover({{.*}}) br label %while } -; Function Attrs: noinline nounwind ssp uwtable +; CHECK-LABEL: @hoo( +; USE-SAME: !prof ![[HOT:[0-9]+]] define void @hoo(i32 %a) #0 { ; CHECK-LABEL: entry: entry: @@ -72,6 +86,7 @@ entry: %rem = srem i32 %0, 2 %cmp = icmp eq i32 %rem, 0 br i1 %cmp, label %if.then, label %if.else + ; USE: br i1 %cmp, label %if.then, label %if.else, !prof ![[WEIGHTS1]] ; CHECK-LABEL: if.then: if.then: ; preds = %entry @@ -94,6 +109,7 @@ for.cond: ; preds = %for.inc, %if.end %2 = load i32, i32* %a.addr, align 4 %cmp1 = icmp slt i32 %1, %2 br i1 %cmp1, label %for.body, label %for.end + ; USE: br i1 %cmp1, label %for.body, label %for.end, !prof ![[WEIGHTS1]] ; CHECK-LABEL: for.body: for.body: ; preds = %for.cond @@ -101,6 +117,7 @@ for.body: ; preds = %for.cond %rem2 = srem i32 %3, 3 %cmp3 = icmp eq i32 %rem2, 0 br i1 %cmp3, label %if.then4, label %if.else5 + ; USE: br i1 %cmp3, label %if.then4, label %if.else5, !prof ![[WEIGHTS0]] ; CHECK-LABEL: if.then4: if.then4: ; preds = %for.body @@ -113,6 +130,7 @@ if.else5: ; preds = %for.body %rem6 = srem i32 %4, 1001 %cmp7 = icmp eq i32 %rem6, 0 br i1 %cmp7, label %if.then8, label %if.end9 + ; USE: br i1 %cmp7, label %if.then8, label %if.end9, !prof ![[WEIGHTS1]] ; CHECK-LABEL: if.then8: if.then8: ; preds = %if.else5 @@ -147,4 +165,8 @@ return: ; preds = %for.end, %if.then8 declare i1 @choice() -; CHECK: declare void @llvm.instrprof.cover({{.*}}) +; GEN: declare void @llvm.instrprof.cover({{.*}}) + +; USE-DAG: ![[HOT]] = !{!"function_entry_count", i64 10000} +; USE-DAG: ![[WEIGHTS0]] = !{!"branch_weights", i32 1, i32 1} +; USE-DAG: ![[WEIGHTS1]] = !{!"branch_weights", i32 1, i32 0} diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..84bb7a5830af2d7989fa7dd4f20781b6c56077e8 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 + +; Tests that ICP compares vtables by checking IR. +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP +; Require exactly one vtable candidate for each function candidate. Tests that ICP compares function by checking IR. +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP +; On top of line 4, ignore 'Base1' and its derived types for vtable-based comparison. Tests that ICP compares functions. +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -icp-ignored-base-types='Base1' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0 +@Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2 +@Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6 + +@Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3 +@Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7 +@Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8 + +; VTABLE-CMP: remark: :0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, sink 2 instruction(s) and compare 1 vtable(s): {Derived1} +; VTABLE-CMP: remark: :0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {Derived2} +; VTABLE-CMP: remark: :0:0: Promote indirect call to Base1_bar with count 400 out of 500, sink 2 instruction(s) and compare 2 vtable(s): {Derived3, Base1} + +define void @test(ptr %d) { +; VTABLE-CMP-LABEL: define void @test( +; VTABLE-CMP-SAME: ptr [[D:%.*]]) { +; VTABLE-CMP-NEXT: [[ENTRY:.*:]] +; VTABLE-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]] +; VTABLE-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1") +; VTABLE-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40) +; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]] +; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]: +; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]] +; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]: +; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64) +; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]] +; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]: +; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]] +; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]: +; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16) +; VTABLE-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived3, i32 16) +; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]] +; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]: +; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]] +; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]: +; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1 +; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8 +; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]] +; VTABLE-CMP: [[IF_END_ICP6]]: +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]] +; VTABLE-CMP: [[IF_END_ICP3]]: +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP]] +; VTABLE-CMP: [[IF_END_ICP]]: +; VTABLE-CMP-NEXT: ret void +; +; FUNC-CMP-LABEL: define void @test( +; FUNC-CMP-SAME: ptr [[D:%.*]]) { +; FUNC-CMP-NEXT: [[ENTRY:.*:]] +; FUNC-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]] +; FUNC-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1") +; FUNC-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; FUNC-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1 +; FUNC-CMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8 +; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar +; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]] +; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]: +; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]] +; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]: +; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar +; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]] +; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]: +; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]] +; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]: +; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar +; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]] +; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]: +; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]] +; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]: +; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]] +; FUNC-CMP: [[IF_END_ICP6]]: +; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]] +; FUNC-CMP: [[IF_END_ICP3]]: +; FUNC-CMP-NEXT: br label %[[IF_END_ICP]] +; FUNC-CMP: [[IF_END_ICP]]: +; FUNC-CMP-NEXT: ret void +; +entry: + %vtable = load ptr, ptr %d, !prof !9 + %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"Base1") + tail call void @llvm.assume(i1 %0) + %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + %1 = load ptr, ptr %vfn + call void %1(ptr %d), !prof !10 + ret void +} + +define void @Base1_bar(ptr %this) { + ret void +} + +define void @Derived1_bar(ptr %this) { + ret void +} + +define void @Derived2_bar(ptr %this) { + ret void +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @Base2_foo(ptr) +declare i32 @Base1_foo(ptr) +declare void @Base3_foo(ptr) + +!llvm.module.flags = !{!11} +!0 = !{i64 16, !"Base1"} +!1 = !{i64 40, !"Base1"} +!2 = !{i64 16, !"Base2"} +!3 = !{i64 16, !"Derived1"} +!4 = !{i64 64, !"Base1"} +!5 = !{i64 40, !"Base2"} +!6 = !{i64 16, !"Base3"} +!7 = !{i64 16, !"Derived2"} +!8 = !{i64 16, !"Derived3"} +!9 = !{!"VP", i32 2, i64 1600, i64 -4123858694673519054, i64 600, i64 -7211198353767973908, i64 500, i64 -3574436251470806727, i64 200, i64 6288809125658696740, i64 200, i64 12345678, i64 100} +!10 = !{!"VP", i32 0, i64 1600, i64 3827408714133779784, i64 600, i64 5837445539218476403, i64 500, i64 -9064955852395570538, i64 400, i64 56781234, i64 100} + +!11 = !{i32 1, !"ProfileSummary", !12} +!12 = !{!13, !14, !15, !16, !17, !18, !19, !20} +!13 = !{!"ProfileFormat", !"InstrProf"} +!14 = !{!"TotalCount", i64 10000} +!15 = !{!"MaxCount", i64 10} +!16 = !{!"MaxInternalCount", i64 1} +!17 = !{!"MaxFunctionCount", i64 1000} +!18 = !{!"NumCounts", i64 3} +!19 = !{!"NumFunctions", i64 3} +!20 = !{!"DetailedSummary", !21} +!21 = !{!22, !23, !24} +!22 = !{i32 10000, i64 101, i32 1} +!23 = !{i32 990000, i64 101, i32 1} +!24 = !{i32 999999, i64 1, i32 2} + + +;. +; VTABLE-COMMON: [[PROF9]] = !{!"VP", i32 2, i64 100, i64 12345678, i64 100} +; VTABLE-COMMON: [[PROF10]] = !{!"branch_weights", i32 600, i32 1000} +; VTABLE-COMMON: [[PROF11]] = !{!"branch_weights", i32 500, i32 500} +; VTABLE-COMMON: [[PROF12]] = !{!"branch_weights", i32 400, i32 100} + diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll new file mode 100644 index 0000000000000000000000000000000000000000..6d3a6972f6885cfe14b453ff94bbe6cc9f977804 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll @@ -0,0 +1,125 @@ +; RUN: opt < %s -passes='pgo-icall-prom' -enable-vtable-profile-use -S | FileCheck %s --check-prefix=VTABLE + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3 + +@.str = private constant [15 x i8] c"out of tickets\00" + +define i32 @test(ptr %b) personality ptr @__gxx_personality_v0 { +; VTABLE-LABEL: define i32 @test( +; VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 { +; VTABLE-NEXT: [[ENTRY:.*:]] +; VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8 +; VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base") +; VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; VTABLE-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i32 16) +; VTABLE-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF4:![0-9]+]] +; VTABLE: [[IF_TRUE_DIRECT_TARG]]: +; VTABLE-NEXT: [[TMP2:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]]) +; VTABLE-NEXT: to label %[[IF_END_ICP:.*]] unwind label %[[LPAD:.*]] +; VTABLE: [[IF_FALSE_ORIG_INDIRECT]]: +; VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV4Base, i32 16) +; VTABLE-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF5:![0-9]+]] +; VTABLE: [[IF_TRUE_DIRECT_TARG1]]: +; VTABLE-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]]) +; VTABLE-NEXT: to label %[[IF_END_ICP3:.*]] unwind label %[[LPAD]] +; VTABLE: [[IF_FALSE_ORIG_INDIRECT2]]: +; VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; VTABLE-NEXT: [[CALL:%.*]] = invoke i32 [[TMP1]](ptr [[B]]) +; VTABLE-NEXT: to label %[[IF_END_ICP3]] unwind label %[[LPAD]] +; VTABLE: [[IF_END_ICP3]]: +; VTABLE-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], %[[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], %[[IF_TRUE_DIRECT_TARG1]] ] +; VTABLE-NEXT: br label %[[IF_END_ICP]] +; VTABLE: [[IF_END_ICP]]: +; VTABLE-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], %[[IF_END_ICP3]] ], [ [[TMP2]], %[[IF_TRUE_DIRECT_TARG]] ] +; VTABLE-NEXT: br label %[[NEXT:.*]] +; VTABLE: [[NEXT]]: +; VTABLE-NEXT: ret i32 [[TMP7]] +; VTABLE: [[LPAD]]: +; VTABLE-NEXT: [[EXN:%.*]] = landingpad { ptr, i32 } +; VTABLE-NEXT: cleanup +; VTABLE-NEXT: unreachable +; +entry: + %vtable = load ptr, ptr %b, !prof !4 + %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call = invoke i32 %1(ptr %b) to label %next unwind label %lpad, !prof !5 + +next: + ret i32 %call + +lpad: + %exn = landingpad {ptr, i32} + cleanup + unreachable +} + +declare void @make_error(ptr, ptr, i32) +declare i32 @get_ticket_id() +declare ptr @__cxa_allocate_exception(i64) + +define i32 @_ZN4Base10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 { +entry: + %call = tail call i32 @get_ticket_id() + %cmp.not = icmp eq i32 %call, -1 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: + ret i32 %call + +if.end: + %exception = tail call ptr @__cxa_allocate_exception(i64 1) + invoke void @make_error(ptr %exception, ptr @.str, i32 1) + to label %invoke.cont unwind label %lpad + +invoke.cont: + unreachable + +lpad: + %0 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } %0 +} + +define i32 @_ZN7Derived10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 { +entry: + %call = tail call i32 @get_ticket_id() + %cmp.not = icmp eq i32 %call, -1 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: + ret i32 %call + +if.end: + %exception = tail call ptr @__cxa_allocate_exception(i64 1) + invoke void @make_error(ptr %exception, ptr @.str, i32 2) + to label %invoke.cont unwind label %lpad + +invoke.cont: + unreachable + +lpad: + %0 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } %0 +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @__gxx_personality_v0(...) + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFivE.virtual"} +!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700} +!5 = !{!"VP", i32 0, i64 1600, i64 14811317294552474744, i64 900, i64 9261744921105590125, i64 700} + +; VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700} +; VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0} +;. diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb9ec0d0c85ff307c034f23c7aaf63084133653c --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; REMARK: remark: :0:0: Promote indirect call to _ZN7Derived5func1Eii with count 900 out of 1600, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV7Derived} +; REMARK: remark: :0:0: Promote indirect call to _ZN4Base5func1Eii with count 700 out of 700, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV4Base} + +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, !type !0, !type !1, !type !2, !type !3 +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, !type !0, !type !1 + +define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) { +; VTABLE-LABEL: define i32 @test_tail_call( +; VTABLE-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) { +; VTABLE-NEXT: entry: +; VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8 +; VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base") +; VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i32 16) +; VTABLE-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]] +; VTABLE: if.true.direct_targ: +; VTABLE-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]]) +; VTABLE-NEXT: ret i32 [[TMP3]] +; VTABLE: 3: +; VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV4Base, i32 16) +; VTABLE-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]] +; VTABLE: if.true.direct_targ1: +; VTABLE-NEXT: [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]]) +; VTABLE-NEXT: ret i32 [[TMP6]] +; VTABLE: 6: +; VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; VTABLE-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[PTR]], i32 [[A]], i32 [[B]]) +; VTABLE-NEXT: ret i32 [[CALL]] +; +entry: + %vtable = load ptr, ptr %ptr, !prof !4 + %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call = musttail call i32 %1(ptr %ptr, i32 %a, i32 %b), !prof !5 + ret i32 %call +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) { +entry: + %sub = sub nsw i32 %a, %b + ret i32 %sub +} + +define i32 @_ZN4Base5func1Eii(ptr %this, i32 %a, i32 %b) { +entry: + %add = add nsw i32 %b, %a + ret i32 %add +} + + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFiiiE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"} +!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700} +!5 = !{!"VP", i32 0, i64 1600, i64 7889036118036845314, i64 900, i64 10495086226207060333, i64 700} + +; VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700} +; VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0} diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index 409f9502add770a9dbaa8aa8dd5688494b8f41e8..d45dcb6ea2848f1f581e03815779ea7557fba69a 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -1,3 +1,4 @@ +; REQUIRES: disabled ;; Tests memprof profile matching (with and without instrumentation profiles). ;; Several requirements due to using raw profile inputs: diff --git a/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll b/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll new file mode 100644 index 0000000000000000000000000000000000000000..f72a20fdc71a6e87bf391e87e8e30678c5044f4f --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s + +; Test that unsupported warning is emitted for non-ELF object files. +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +; CHECK: warning: {{.*}} VTable value profiling is presently not supported for non-ELF object formats + +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base4funcEi] }, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived4funcEi] }, !type !0, !type !1, !type !2, !type !3 + +@llvm.compiler.used = appending global [2 x ptr] [ptr @_ZTV4Base, ptr @_ZTV7Derived], section "llvm.metadata" + +define i32 @_Z4funci(i32 %a) { +entry: + %call = call ptr @_Z10createTypev() + %vtable = load ptr, ptr %call + %0 = call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS7Derived") + call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call1 = call i32 %1(ptr %call, i32 %a) + ret i32 %call1 +} + +declare ptr @_Z10createTypev() +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @_ZN4Base4funcEi(ptr, i32) +declare i32 @_ZN7Derived4funcEi(ptr , i32) + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFiiE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFiiE.virtual"} diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll new file mode 100644 index 0000000000000000000000000000000000000000..93448b1785cd96c3094046aaa9569c1df7e363e4 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll @@ -0,0 +1,98 @@ +; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=GEN --implicit-check-not="VTable value profiling is presently not supported" +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=LOWER --implicit-check-not="VTable value profiling is presently not supported" + +; __llvm_prf_vnm stores zlib-compressed vtable names. +; REQUIRES: zlib + +source_filename = "vtable_local.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The test IR is generated based on the following C++ program. +; Base1 has external linkage and Base2 has local linkage. +; class Derived uses multiple inheritance so its virtual table +; global variable contains two vtables. func1 is loaded from +; the vtable compatible with class Base1, and func2 is loaded +; from the vtable compatible with class Base2. + +; class Base1 { +; public: +; virtual int func1(int a) ; +; }; +; +; namespace { +; class Base2 { +; public: +; __attribute__((noinline)) virtual int func2(int a) { +; return a; +; } +; }; +; } + +; class Derived : public Base1, public Base2 { +; public: +; Derived(int c) : v(c) {} +; private: +; int v; +; }; +; +; Derived* createType(); + +; int func(int a) { +; Derived* d = createType(); +; return d->func2(a) + d->func1(a); +; } + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !0, !type !3, !type !6, !type !8, !type !10 +@_ZTV5Base1 = available_externally constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei] }, !type !0 +@_ZTVN12_GLOBAL__N_15Base2E = internal constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !11, !type !8; !vcall_visibility !12 +@llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTV5Base1], section "llvm.metadata" + +; GEN: __llvm_profile_raw_version = comdat any +; GEN: __llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat +; GEN: __profn__Z4funci = private constant [8 x i8] c"_Z4funci" + +; LOWER: $__profvt__ZTV7Derived = comdat nodeduplicate +; LOWER: $"__profvt_vtable_local.ll:_ZTVN12_GLOBAL__N_15Base2E" = comdat nodeduplicate +; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8 +; LOWER: @"__profvt_vtable_local.ll:_ZTVN12_GLOBAL__N_15Base2E" = internal global { i64, ptr, i32 } { i64 2756736352897687539, ptr @_ZTVN12_GLOBAL__N_15Base2E, i32 24 }, section "__llvm_prf_vtab", comdat, align 8 +; LOWER: @__llvm_prf_vnm = private constant [64 x i8] c"7>x\DA\8B\8F\0A\093wI-\CA,KMa,+IL\CAI\8D\CF\C9ON\CC\D1\CB\C9\B1\8A\07J\FA\19\1A\C5\BB\FB\F8;9\FA\C4\C7\FB\C5\1B\9A:%\16\A7\1A\B9\02\00\19\1F\12n", section "__llvm_prf_vns", align 1 +; LOWER: @llvm.used = appending global [5 x ptr] [ptr @__profvt__ZTV7Derived, ptr @"__profvt_vtable_local.ll:_ZTVN12_GLOBAL__N_15Base2E", ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata" + +define i32 @_Z4funci(i32 %a) { +entry: + %call = call ptr @_Z10createTypev() + %add.ptr = getelementptr inbounds i8, ptr %call, i64 8 + %vtable = load ptr, ptr %add.ptr +; GEN: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64 +; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash:[0-9]+]], i64 [[P1]], i32 2, i32 0) +; LOWER: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64 +; LOWER: call void @__llvm_profile_instrument_target(i64 [[P1]], ptr @__profd__Z4funci, i32 2) + %vfunc1 = load ptr, ptr %vtable + %call1 = call i32 %vfunc1(ptr %add.ptr, i32 %a) + %vtable2 = load ptr, ptr %call +; GEN: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64 +; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash]], i64 [[P2]], i32 2, i32 1) +; LOWER: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64 +; LOWER: call void @__llvm_profile_instrument_target(i64 [[P2]], ptr @__profd__Z4funci, i32 3) + %vfunc2 = load ptr, ptr %vtable2 + %call4 = call i32 %vfunc2(ptr %call, i32 %a) + %add = add nsw i32 %call1, %call4 + ret i32 %add +} + +declare ptr @_Z10createTypev() +declare i32 @_ZN12_GLOBAL__N_15Base25func2Ei(ptr %this, i32 %a) +declare i32 @_ZN5Base15func1Ei(ptr, i32) + +!0 = !{i64 16, !"_ZTS5Base1"} +!3 = !{i64 16, !"_ZTS7Derived"} +!6 = !{i64 40, !7} +!7 = distinct !{} +!8 = !{i64 16, !9} +!9 = distinct !{} +!10 = !{i64 40, !9} +!11 = !{i64 16, !7} diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.profraw b/llvm/test/tools/llvm-profdata/Inputs/basic.profraw index ad88759398c6020f4ab8a5606258e69d98e36687..1b284b84fad6dd7f9407b1c3b99cb178af0e09c6 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.profraw and b/llvm/test/tools/llvm-profdata/Inputs/basic.profraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw b/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw index bc8fc5db1cb154d98ca962e84313463e3298cb92..a3e884343942ebc70ba95ab4ee006630b6816d80 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw and b/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/compat.profdata.v10 b/llvm/test/tools/llvm-profdata/Inputs/compat.profdata.v10 new file mode 100644 index 0000000000000000000000000000000000000000..c331e30b48ff5d3be2efe4636d0c9fee56e764b5 Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/compat.profdata.v10 differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw b/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw index 134b78f7af5b760dc3af7422c1bf7661f4bae14a..e3f77e870d4d20828119348e70eb44e6d39e0ec0 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw and b/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/thinlto_indirect_call_promotion.profraw b/llvm/test/tools/llvm-profdata/Inputs/thinlto_indirect_call_promotion.profraw new file mode 100644 index 0000000000000000000000000000000000000000..84707ba2070a92b8683010d9daaef747df35f9ac Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/thinlto_indirect_call_promotion.profraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext new file mode 100644 index 0000000000000000000000000000000000000000..372f9f97b164547389a0cc736901c15ca2b722c4 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext @@ -0,0 +1,74 @@ +# IR level Instrumentation Flag +:ir +_Z10createTypei +# Func Hash: +146835647075900052 +# Num Counters: +2 +# Counter Values: +750 +250 + +_ZN8Derived15func1Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +250 + +_ZN8Derived15func2Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +250 + +main +# Func Hash: +1124236338992350536 +# Num Counters: +2 +# Counter Values: +1000 +1 +# Num Value Kinds: +2 +# ValueKind = IPVK_IndirectCallTarget: +0 +# NumValueSites: +2 +2 +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +_ZN8Derived15func1Eii:250 +2 +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +_ZN8Derived15func2Eii:250 +# ValueKind = IPVK_VTableTarget: +2 +# NumValueSites: +2 +2 +vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +_ZTV8Derived1:250 +2 +vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +_ZTV8Derived1:250 + +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +750 + +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +750 + diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test index 67db5c98ef323a7546fe8a78add4ae6b7a7c6dd8..61881b69cfd5c07b95b36df98c452d19ab7371bd 100644 --- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test +++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test @@ -5,13 +5,17 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw // There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes. // 2 * 8 binary ID sizes // + 2 * 20 binary IDs (of size 20) @@ -23,10 +27,15 @@ RUN: printf '\2\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\3\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\20\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Binary IDs - There are only two in this case that are 20 bytes. RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw @@ -51,14 +60,18 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\067\265\035\031\112\165\023\344' >> %t.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw -RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t.profraw +RUN: printf '\xc8\xff\3\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\067\0\0\0\0\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/compat.proftext b/llvm/test/tools/llvm-profdata/compat.proftext index 73321cc5e66d462497a2ec3062ee0fff1c7798aa..111fd4198819748c5bf3c072fea9ac9b024dd70e 100644 --- a/llvm/test/tools/llvm-profdata/compat.proftext +++ b/llvm/test/tools/llvm-profdata/compat.proftext @@ -87,3 +87,26 @@ large_numbers # FORMATV4: Total functions: 3 # FORMATV4: Maximum function count: 2305843009213693952 # FORMATV4: Maximum internal block count: 1152921504606846976 + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v10 -all-functions --counts | FileCheck %s -check-prefix=FORMATV10 + +# FORMATV10: Counters: +# FORMATV10: large_numbers: +# FORMATV10: Hash: 0x3fffffffffffffff +# FORMATV10: Counters: 6 +# FORMATV10: Function count: 2305843009213693952 +# FORMATV10: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936] +# FORMATV10: name with spaces: +# FORMATV10: Hash: 0x0000000000000400 +# FORMATV10: Counters: 2 +# FORMATV10: Function count: 0 +# FORMATV10: Block counts: [0] +# FORMATV10: function_count_only: +# FORMATV10: Hash: 0x0000000000000000 +# FORMATV10: Counters: 1 +# FORMATV10: Function count: 97531 +# FORMATV10: Block counts: [] +# FORMATV10: Functions shown: 3 +# FORMATV10: Total functions: 3 +# FORMATV10: Maximum function count: 2305843009213693952 +# FORMATV10: Maximum internal block count: 1152921504606846976 diff --git a/llvm/test/tools/llvm-profdata/large-binary-id-size.test b/llvm/test/tools/llvm-profdata/large-binary-id-size.test index 2394431e94de4824c706d295af267fd80b6692f6..316a9a4c9df4ce5c96186ab90f3b4b6e495579aa 100644 --- a/llvm/test/tools/llvm-profdata/large-binary-id-size.test +++ b/llvm/test/tools/llvm-profdata/large-binary-id-size.test @@ -1,5 +1,5 @@ RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -9,6 +9,11 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Check for a corrupted size being too large past the end of the file. RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test index 06f418d0235d260d978a2550c74976e9309fa101..8b686d5c50cb748a98d99375a52ccac82e489867 100644 --- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test +++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test @@ -5,22 +5,31 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Data Section // @@ -35,7 +44,9 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\3\0foo\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test index b718cf0fd8e9723b4afab9c6c9f5c941ea1d3192..089afad420622397c561d020ceea3a691037277c 100644 --- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test +++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test @@ -5,22 +5,31 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Data Section // @@ -35,11 +44,46 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw + +// Make two copies for another test. +RUN: cp %t.profraw %t-bad.profraw +RUN: cp %t.profraw %t-good.profraw + // Make NumCounters = 0 so that we get "number of counters is zero" error message RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\3\0foo\0\0\0' >> %t.profraw -RUN: not llvm-profdata show %t.profraw 2>&1 | FileCheck %s -CHECK: malformed instrumentation profile data: number of counters is zero +RUN: not llvm-profdata show %t.profraw 2>&1 | FileCheck %s --check-prefix=ZERO +ZERO: malformed instrumentation profile data: number of counters is zero + +// Test a counter value greater than 2^56. +RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bad.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bad.profraw +// Counter value is 72057594037927937 +RUN: printf '\1\0\0\0\0\0\0\1' >> %t-bad.profraw +RUN: printf '\3\0foo\0\0\0' >> %t-bad.profraw + +RUN: printf '\1\0\0\0\0\0\0\0' >> %t-good.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-good.profraw +// Counter value is 72057594037927937 +RUN: printf '\1\0\0\0\0\0\0\0' >> %t-good.profraw +RUN: printf '\3\0foo\0\0\0' >> %t-good.profraw + +// llvm-profdata fails if there is a warning for any input file under default failure mode (any). +RUN: not llvm-profdata merge %t-bad.profraw %t-good.profraw -o %t.profdata 2>&1 | FileCheck %s --check-prefix=ANY +ANY: {{.*}} excessively large counter value suggests corrupted profile data: 72057594037927937 + +// -failure-mode=all only fails if there is a warning for every input file. +RUN: not llvm-profdata merge %t-bad.profraw -failure-mode=all -o %t.profdata 2>&1 | FileCheck %s --check-prefix=ALL-ERR +ALL-ERR: {{.*}} excessively large counter value suggests corrupted profile data: 72057594037927937 + +RUN: llvm-profdata merge %t-bad.profraw %t-good.profraw -failure-mode=all -o %t.profdata 2>&1 | FileCheck %s --check-prefix=ALL-WARN +ALL-WARN: {{.*}} excessively large counter value suggests corrupted profile data: 72057594037927937 + +// -failure-mode=warn does not fail at all. It only prints warnings. +RUN: llvm-profdata merge %t-bad.profraw -failure-mode=warn -o %t.profdata 2>&1 | FileCheck %s --check-prefix=WARN +WARN: {{.*}} excessively large counter value suggests corrupted profile data: 72057594037927937 diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test index 38e40334a6a690d554eacc29af33efaa7525cd6f..e404ba4210cc14471625a762d9e2615e89e9ac76 100644 --- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test +++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test @@ -5,22 +5,31 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\2\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\6\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Data Section // @@ -38,10 +47,12 @@ RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw // Octal '\11' is 9 in decimal: this should push CounterOffset to 1. As there are two counters, // the profile reader should error out. RUN: printf '\11\0\6\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Counter Section diff --git a/llvm/test/tools/llvm-profdata/mcdc-bitmap.test b/llvm/test/tools/llvm-profdata/mcdc-bitmap.test new file mode 100644 index 0000000000000000000000000000000000000000..a7b1b5df8c306c2247dd248ddf7729a198c5f824 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/mcdc-bitmap.test @@ -0,0 +1,201 @@ +# Test MC/DC bitmap reading and merging. + +# Merge as profdata. +RUN: split-file %s %t +RUN: llvm-profdata merge %t/mcdc-1.proftext %t/mcdc-2.proftext -o %t.profdata +RUN: llvm-profdata show %t.profdata --text -all-functions | FileCheck %s --check-prefix=MCDC +# Merge as proftext. +RUN: llvm-profdata merge %t/mcdc-1.proftext %t/mcdc-2.proftext -o %t.proftext +RUN: llvm-profdata show %t.proftext --text -all-functions | FileCheck %s --check-prefix=MCDC + +MCDC: # Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: # Bitmap Byte Values: +MCDC-NEXT: a +MCDC: # Num Bitmap Bytes: +MCDC-NEXT: $2 +MCDC-NEXT: # Bitmap Byte Values: +MCDC-NEXT: 0x29 +MCDC-NEXT: 0x0 + +# Merge as profdata. +RUN: llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-4.proftext -o %t.profdata +RUN: llvm-profdata show %t.profdata --text -all-functions | FileCheck %s --check-prefix=MCDC2 +# Merge as proftext. +RUN: llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-4.proftext -o %t.proftext +RUN: llvm-profdata show %t.proftext --text -all-functions | FileCheck %s --check-prefix=MCDC2 + +MCDC2: # Num Bitmap Bytes: +MCDC2-NEXT: $8 +MCDC2-NEXT: # Bitmap Byte Values: +MCDC2-NEXT: 0x1 +MCDC2-NEXT: 0x2 +MCDC2-NEXT: 0x3 +MCDC2-NEXT: 0xf +MCDC2-NEXT: 0xf +MCDC2-NEXT: 0xe +MCDC2-NEXT: 0xf +MCDC2-NEXT: 0xa + +# Incompatible size mismatch. +RUN: llvm-profdata merge %t/mcdc-2.proftext %t/mcdc-4.proftext -o %t.profdata 2>&1 | FileCheck %s --check-prefix=MCDC3 +# Merge as proftext +RUN: llvm-profdata merge %t/mcdc-2.proftext %t/mcdc-4.proftext -o %t.proftext 2>&1 | FileCheck %s --check-prefix=MCDC3 + +MCDC3: function bitmap size change detected (bitmap size mismatch) + +# Invalid number of bitmap bytes. +RUN: not llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-err0.proftext -o %t.proftext 2>&1 | FileCheck %s --check-prefix=MCDC4 + +MCDC4: malformed instrumentation profile data: number of bitmap bytes is not a valid integer + +# Invalid bitmap byte. +RUN: not llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-err1.proftext -o %t.proftext 2>&1 | FileCheck %s --check-prefix=MCDC5 + +MCDC5: malformed instrumentation profile data: bitmap byte is not a valid integer + +;--- mcdc-1.proftext +main +# Func Hash: +702755447896 +# Num Counters: +4 +# Counter Values: +1 +0 +1 +0 +# Num Bitmask Bytes: +$1 +# Bitmask Byte Values: +2 +;--- mcdc-2.proftext +main +# Func Hash: +702755447896 +# Num Counters: +4 +# Counter Values: +1 +1 +1 +1 +# Num Bitmask Bytes: +$1 +# Bitmask Byte Values: +8 + + +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$0x2 +# Bitmask Byte Values: +0x29 +0x0 +;--- mcdc-3.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$8 +# Bitmask Byte Values: +0x0 +0x2 +0x3 +0xf +0xf +0xa +0xc +0x2 +;--- mcdc-4.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$ 8 +# Bitmask Byte Values: +1 +2 +3 +4 +5 +6 +7 +8 +;--- mcdc-err0.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$8.9 +# Bitmask Byte Values: +1 +2 +3 +4 +5 +6 +7 +8 +;--- mcdc-err1.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$8 +# Bitmask Byte Values: +1 +2 +3 +4 +5.4 +6 +7 +8 diff --git a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test index 171b5cc60878f4c8b686e8ca22650d6fe73a9f80..ee54bfb9785678bca991f2362f644e8fb97149b3 100644 --- a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test +++ b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test @@ -1,5 +1,5 @@ RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw // We should fail on this because the binary IDs is not a multiple of 8 bytes. RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -10,6 +10,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Binary IDs - There are only two in this case that are 20 bytes. RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test index 24f3f563e9689d6ef887e6d579bcd282a19d719b..dfa163f1f3439a95428ff047b87d4ba35813f3ad 100644 --- a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test +++ b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test @@ -1,19 +1,24 @@ // Magic RUN: printf '\377lprofr\201' > %t // Version -RUN: printf '\0\01\0\0\0\0\0\10' >> %t +RUN: printf '\0\0\0\0\10\0\0\10' >> %t // The rest of the header needs to be there to prevent a broken header error. RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\20' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t RUN: printf '\0\0\0\2\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: not llvm-profdata show %t -o /dev/null 2>&1 | FileCheck %s -CHECK: raw profile version mismatch: Profile uses raw profile format version = 281474976710664; expected version = {{[0-9]+}} +CHECK: raw profile version mismatch: Profile uses raw profile format version = 134217736; expected version = {{[0-9]+}} CHECK-NEXT: PLEASE update this tool to version in the raw profile, or regenerate raw profile with expected version. diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test index c8e862009ef0284a84acc0d2f2131f50838138c3..63782c8b94d4a50748ef453fe2a04494f6bc46d0 100644 --- a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test +++ b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test @@ -1,37 +1,49 @@ +// Header RUN: printf '\377lprofR\201' > %t -RUN: printf '\0\0\0\0\0\0\0\10' >> %t +RUN: printf '\0\0\0\0\0\0\0\12' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\4' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\20' >> %t RUN: printf '\0\0\0\0\1\0\0\0' >> %t +RUN: printf '\0\0\0\0\3\0\0\0' >> %t RUN: printf '\0\0\0\0\2\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\134\370\302\114\333\030\275\254' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\1\0\0\0' >> %t +RUN: printf '\3\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\1' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\3' >> %t RUN: printf '\344\023\165\112\031\035\265\067' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t -RUN: printf '\0\xff\xff\xe0' >> %t +RUN: printf '\0\xff\xff\xd8' >> %t +RUN: printf '\2\xff\xff\xd3' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\1' >> %t RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -48,3 +60,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test index 523ff1ceb4807a3c83d0ac36cde2ecc1417f41c6..e9569bec1178bd3f2eca9df5c3a7e6c803370f10 100644 --- a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test +++ b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test @@ -1,37 +1,48 @@ RUN: printf '\201Rforpl\377' > %t -RUN: printf '\10\0\0\0\0\0\0\0' >> %t +RUN: printf '\12\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\4\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\20\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1\0\0\0\0' >> %t +RUN: printf '\0\0\0\3\0\0\0\0' >> %t RUN: printf '\0\0\0\2\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\254\275\030\333\114\302\370\134' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1' >> %t +RUN: printf '\0\0\0\3' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\1\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\3\0\0\0' >> %t RUN: printf '\067\265\035\031\112\165\023\344' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t -RUN: printf '\xe0\xff\xff\0' >> %t +RUN: printf '\xd8\xff\xff\0' >> %t +RUN: printf '\xd3\xff\xff\2' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\2\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\1\0\0\0' >> %t RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -48,3 +59,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test index b2b8b31dafbf5ac4703a04e5fdf974af2dd5f1ba..0bc579eec58abb8d88affa8d6dc03d9a65002df2 100644 --- a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test +++ b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test @@ -1,35 +1,48 @@ RUN: printf '\377lprofr\201' > %t -RUN: printf '\0\0\0\0\0\0\0\10' >> %t +RUN: printf '\0\0\0\0\0\0\0\12' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\4' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\20' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t +RUN: printf '\0\0\0\3\0\4\0\0' >> %t RUN: printf '\0\0\0\2\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\134\370\302\114\333\030\275\254' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t +RUN: printf '\0\0\0\3\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t -RUN: printf '\0\0\0\1\0\0\0\0' >> %t +RUN: printf '\0\0\0\1' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\3' >> %t RUN: printf '\344\023\165\112\031\035\265\067' >> %t RUN: printf '\0\0\0\0\0\0\0\02' >> %t -RUN: printf '\0\0\0\1\0\3\xff\xd8' >> %t +RUN: printf '\0\0\0\1\0\3\xff\xc8' >> %t +RUN: printf '\0\0\0\3\0\3\xff\xc3' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\02' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t -RUN: printf '\0\0\0\02\0\0\0\0' >> %t +RUN: printf '\0\0\0\1' >> %t RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -46,3 +59,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test index 4e95798bc0afbdaa9ff7bca7ba796df49293da19..ca9ea54c3f0146e1c9dd72c73b22f488a5fc0bee 100644 --- a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test +++ b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test @@ -1,35 +1,48 @@ RUN: printf '\201rforpl\377' > %t -RUN: printf '\10\0\0\0\0\0\0\0' >> %t +RUN: printf '\12\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\4\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\20\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t +RUN: printf '\0\0\4\0\3\0\0\0' >> %t RUN: printf '\0\0\4\0\2\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\254\275\030\333\114\302\370\134' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t +RUN: printf '\0\0\4\0\3\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t -RUN: printf '\1\0\0\0\0\0\0\0' >> %t +RUN: printf '\1\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\3\0\0\0' >> %t RUN: printf '\067\265\035\031\112\165\023\344' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t -RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t +RUN: printf '\xc8\xff\3\0\1\0\0\0' >> %t +RUN: printf '\xc3\xff\3\0\3\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t -RUN: printf '\02\0\0\0\0\0\0\0' >> %t +RUN: printf '\02\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\1\0\0\0' >> %t RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -46,3 +59,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-two-profiles.test b/llvm/test/tools/llvm-profdata/raw-two-profiles.test index 8d46c91e2732cd97ade334f7794a428227ec10da..70a4210dea9f84d7eb9dac74ea13ace3b7a960dc 100644 --- a/llvm/test/tools/llvm-profdata/raw-two-profiles.test +++ b/llvm/test/tools/llvm-profdata/raw-two-profiles.test @@ -1,43 +1,57 @@ RUN: printf '\201rforpl\377' > %t-foo.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\254\275\030\333\114\302\370\134' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\3\0foo\0\0\0' >> %t-foo.profraw RUN: printf '\201rforpl\377' > %t-bar.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\12\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\067\265\035\031\112\165\023\344' >> %t-bar.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.test b/llvm/test/tools/llvm-profdata/vtable-value-prof.test new file mode 100644 index 0000000000000000000000000000000000000000..8dc8f6f0d480eea776445f691960648fd3d3dd17 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.test @@ -0,0 +1,83 @@ +; RUN: rm -rf %t && mkdir %t && cd %t + +; Generate indexed profiles from text profiles +RUN: llvm-profdata merge --keep-vtable-symbols %S/Inputs/vtable-value-prof.proftext -o indexed.profdata + +; Show indexed profiles +RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED + +; Show text profiles +RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %S/Inputs/vtable-value-prof.proftext | FileCheck %s --check-prefix=ICTEXT + +; Convert indexed profiles to its textual output and show it. +RUN: llvm-profdata merge --keep-vtable-symbols --text -o text-from-indexed.proftext indexed.profdata +RUN: llvm-profdata show --function=main --ic-targets --show-vtables text-from-indexed.proftext | FileCheck %s --check-prefix=INDEXED +RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text text-from-indexed.proftext | FileCheck %s --check-prefix=ICTEXT + +INDEXED: Counters: +INDEXED-NEXT: main: +INDEXED-NEXT: Hash: 0x0f9a16fe6d398548 +INDEXED-NEXT: Counters: 2 +INDEXED-NEXT: Indirect Call Site Count: 2 +INDEXED-NEXT: Number of instrumented vtables: 2 +INDEXED-NEXT: Indirect Target Results: +INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +INDEXED-NEXT: VTable Results: +INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +INDEXED-NEXT: Instrumentation level: IR entry_first = 0 +INDEXED-NEXT: Functions shown: 1 +INDEXED-NEXT: Total functions: 6 +INDEXED-NEXT: Maximum function count: 1000 +INDEXED-NEXT: Maximum internal block count: 250 +INDEXED-NEXT: Statistics for indirect call sites profile: +INDEXED-NEXT: Total number of sites: 2 +INDEXED-NEXT: Total number of sites with values: 2 +INDEXED-NEXT: Total number of profiled values: 4 +INDEXED-NEXT: Value sites histogram: +INDEXED-NEXT: NumTargets, SiteCount +INDEXED-NEXT: 2, 2 +INDEXED-NEXT: Statistics for vtable profile: +INDEXED-NEXT: Total number of sites: 2 +INDEXED-NEXT: Total number of sites with values: 2 +INDEXED-NEXT: Total number of profiled values: 4 +INDEXED-NEXT: Value sites histogram: +INDEXED-NEXT: NumTargets, SiteCount +INDEXED-NEXT: 2, 2 + +ICTEXT: :ir +ICTEXT: main +ICTEXT: # Func Hash: +ICTEXT: 1124236338992350536 +ICTEXT: # Num Counters: +ICTEXT: 2 +ICTEXT: # Counter Values: +ICTEXT: 1000 +ICTEXT: 1 +ICTEXT: # Num Value Kinds: +ICTEXT: 2 +ICTEXT: # ValueKind = IPVK_IndirectCallTarget: +ICTEXT: 0 +ICTEXT: # NumValueSites: +ICTEXT: 2 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +ICTEXT: _ZN8Derived15func1Eii:250 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +ICTEXT: _ZN8Derived15func2Eii:250 +ICTEXT: # ValueKind = IPVK_VTableTarget: +ICTEXT: 2 +ICTEXT: # NumValueSites: +ICTEXT: 2 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +ICTEXT: _ZTV8Derived1:250 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +ICTEXT: _ZTV8Derived1:250 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index da10ddcc58c642fb5c8ba105e69f3ff5d8643da0..3d87d478a1142b955bf84ff765021113588dc937 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -46,10 +46,35 @@ #include using namespace llvm; +using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind; + +// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations +// on each subcommand. +cl::SubCommand ShowSubcommand( + "show", + "Takes a profile data file and displays the profiles. See detailed " + "documentation in " + "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show"); +cl::SubCommand OrderSubcommand( + "order", + "Reads temporal profiling traces from a profile and outputs a function " + "order that reduces the number of page faults for those traces. See " + "detailed documentation in " + "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order"); +cl::SubCommand OverlapSubcommand( + "overlap", + "Computes and displays the overlap between two profiles. See detailed " + "documentation in " + "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap"); +cl::SubCommand MergeSubcommand( + "merge", + "Takes several profiles and merge them together. See detailed " + "documentation in " + "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge"); -// We use this string to indicate that there are -// multiple static functions map to the same name. -const std::string DuplicateNameStr = "----"; +namespace { +enum ProfileKinds { instr, sample, memory }; +enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid }; enum ProfileFormat { PF_None = 0, @@ -61,6 +86,340 @@ enum ProfileFormat { }; enum class ShowFormat { Text, Json, Yaml }; +} // namespace + +// Common options. +cl::opt OutputFilename("output", cl::value_desc("output"), + cl::init("-"), cl::desc("Output file"), + cl::sub(ShowSubcommand), + cl::sub(OrderSubcommand), + cl::sub(OverlapSubcommand), + cl::sub(MergeSubcommand)); +// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub() +// will be used. llvm::cl::alias::done() method asserts this condition. +cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), + cl::aliasopt(OutputFilename)); + +// Options common to at least two commands. +cl::opt ProfileKind( + cl::desc("Profile kind:"), cl::sub(MergeSubcommand), + cl::sub(OverlapSubcommand), cl::init(instr), + cl::values(clEnumVal(instr, "Instrumentation profile (default)"), + clEnumVal(sample, "Sample profile"))); +cl::opt Filename(cl::Positional, cl::desc(""), + cl::sub(ShowSubcommand), + cl::sub(OrderSubcommand)); +cl::opt MaxDbgCorrelationWarnings( + "max-debug-info-correlation-warnings", + cl::desc("The maximum number of warnings to emit when correlating " + "profile from debug info (0 = no limit)"), + cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(5)); +cl::opt ProfiledBinary( + "profiled-binary", cl::init(""), + cl::desc("Path to binary from which the profile was collected."), + cl::sub(ShowSubcommand), cl::sub(MergeSubcommand)); +cl::opt DebugInfoFilename( + "debug-info", cl::init(""), + cl::desc( + "For show, read and extract profile metadata from debug info and show " + "the functions it found. For merge, use the provided debug info to " + "correlate the raw profile."), + cl::sub(ShowSubcommand), cl::sub(MergeSubcommand)); +cl::opt + BinaryFilename("binary-file", cl::init(""), + cl::desc("For merge, use the provided unstripped bianry to " + "correlate the raw profile."), + cl::sub(MergeSubcommand)); +cl::opt FuncNameFilter( + "function", + cl::desc("Details for matching functions. For overlapping CSSPGO, this " + "takes a function name with calling context."), + cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand)); + +// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to +// factor out the common cl::sub in cl::opt constructor for subcommand-specific +// options. + +// Options specific to merge subcommand. +cl::list InputFilenames(cl::Positional, cl::sub(MergeSubcommand), + cl::desc("")); +cl::list WeightedInputFilenames("weighted-input", + cl::sub(MergeSubcommand), + cl::desc(",")); +cl::opt OutputFormat( + cl::desc("Format of output profile"), cl::sub(MergeSubcommand), + cl::init(PF_Ext_Binary), + cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"), + clEnumValN(PF_Ext_Binary, "extbinary", + "Extensible binary encoding " + "(default)"), + clEnumValN(PF_Text, "text", "Text encoding"), + clEnumValN(PF_GCC, "gcc", + "GCC encoding (only meaningful for -sample)"))); +cl::opt + InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand), + cl::desc("Path to file containing newline-separated " + "[,] entries")); +cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"), + cl::aliasopt(InputFilenamesFile)); +cl::opt DumpInputFileList( + "dump-input-file-list", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Dump the list of input files and their weights, then exit")); +cl::opt RemappingFile("remapping-file", cl::value_desc("file"), + cl::sub(MergeSubcommand), + cl::desc("Symbol remapping file")); +cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"), + cl::aliasopt(RemappingFile)); +cl::opt + UseMD5("use-md5", cl::init(false), cl::Hidden, + cl::desc("Choose to use MD5 to represent string in name table (only " + "meaningful for -extbinary)"), + cl::sub(MergeSubcommand)); +cl::opt CompressAllSections( + "compress-all-sections", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Compress all sections when writing the profile (only " + "meaningful for -extbinary)")); +cl::opt SampleMergeColdContext( + "sample-merge-cold-context", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc( + "Merge context sample profiles whose count is below cold threshold")); +cl::opt SampleTrimColdContext( + "sample-trim-cold-context", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc( + "Trim context sample profiles whose count is below cold threshold")); +cl::opt SampleColdContextFrameDepth( + "sample-frame-depth-for-cold-context", cl::init(1), + cl::sub(MergeSubcommand), + cl::desc("Keep the last K frames while merging cold profile. 1 means the " + "context-less base profile")); +cl::opt OutputSizeLimit( + "output-size-limit", cl::init(0), cl::Hidden, cl::sub(MergeSubcommand), + cl::desc("Trim cold functions until profile size is below specified " + "limit in bytes. This uses a heursitic and functions may be " + "excessively trimmed")); +cl::opt GenPartialProfile( + "gen-partial-profile", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Generate a partial profile (only meaningful for -extbinary)")); +cl::opt SupplInstrWithSample( + "supplement-instr-with-sample", cl::init(""), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Supplement an instr profile with sample profile, to correct " + "the profile unrepresentativeness issue. The sample " + "profile is the input of the flag. Output will be in instr " + "format (The flag only works with -instr)")); +cl::opt ZeroCounterThreshold( + "zero-counter-threshold", cl::init(0.7), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("For the function which is cold in instr profile but hot in " + "sample profile, if the ratio of the number of zero counters " + "divided by the total number of counters is above the " + "threshold, the profile of the function will be regarded as " + "being harmful for performance and will be dropped.")); +cl::opt SupplMinSizeThreshold( + "suppl-min-size-threshold", cl::init(10), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("If the size of a function is smaller than the threshold, " + "assume it can be inlined by PGO early inliner and it won't " + "be adjusted based on sample profile.")); +cl::opt InstrProfColdThreshold( + "instr-prof-cold-threshold", cl::init(0), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("User specified cold threshold for instr profile which will " + "override the cold threshold got from profile summary. ")); +// WARNING: This reservoir size value is propagated to any input indexed +// profiles for simplicity. Changing this value between invocations could +// result in sample bias. +cl::opt TemporalProfTraceReservoirSize( + "temporal-profile-trace-reservoir-size", cl::init(100), + cl::sub(MergeSubcommand), + cl::desc("The maximum number of stored temporal profile traces (default: " + "100)")); +cl::opt TemporalProfMaxTraceLength( + "temporal-profile-max-trace-length", cl::init(10000), + cl::sub(MergeSubcommand), + cl::desc("The maximum length of a single temporal profile trace " + "(default: 10000)")); + +cl::opt + FailMode("failure-mode", cl::init(failIfAnyAreInvalid), + cl::desc("Failure mode:"), cl::sub(MergeSubcommand), + cl::values(clEnumValN(warnOnly, "warn", + "Do not fail and just print warnings."), + clEnumValN(failIfAnyAreInvalid, "any", + "Fail if any profile is invalid."), + clEnumValN(failIfAllAreInvalid, "all", + "Fail only if all profiles are invalid."))); + +cl::opt OutputSparse( + "sparse", cl::init(false), cl::sub(MergeSubcommand), + cl::desc("Generate a sparse profile (only meaningful for -instr)")); +cl::opt NumThreads( + "num-threads", cl::init(0), cl::sub(MergeSubcommand), + cl::desc("Number of merge threads to use (default: autodetect)")); +cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), + cl::aliasopt(NumThreads)); + +cl::opt ProfileSymbolListFile( + "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand), + cl::desc("Path to file containing the list of function symbols " + "used to populate profile symbol list")); + +cl::opt ProfileLayout( + "convert-sample-profile-layout", + cl::desc("Convert the generated profile to a profile with a new layout"), + cl::sub(MergeSubcommand), cl::init(SPL_None), + cl::values( + clEnumValN(SPL_Nest, "nest", + "Nested profile, the input should be CS flat profile"), + clEnumValN(SPL_Flat, "flat", + "Profile with nested inlinee flatten out"))); + +cl::opt DropProfileSymbolList( + "drop-profile-symbol-list", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Drop the profile symbol list when merging AutoFDO profiles " + "(only meaningful for -sample)")); + +cl::opt KeepVTableSymbols( + "keep-vtable-symbols", cl::init(false), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("If true, keep the vtable symbols in indexed profiles")); + +// Options specific to overlap subcommand. +cl::opt BaseFilename(cl::Positional, cl::Required, + cl::desc(""), + cl::sub(OverlapSubcommand)); +cl::opt TestFilename(cl::Positional, cl::Required, + cl::desc(""), + cl::sub(OverlapSubcommand)); + +cl::opt SimilarityCutoff( + "similarity-cutoff", cl::init(0), + cl::desc("For sample profiles, list function names (with calling context " + "for csspgo) for overlapped functions " + "with similarities below the cutoff (percentage times 10000)."), + cl::sub(OverlapSubcommand)); + +cl::opt IsCS( + "cs", cl::init(false), + cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."), + cl::sub(OverlapSubcommand)); + +cl::opt OverlapValueCutoff( + "value-cutoff", cl::init(-1), + cl::desc( + "Function level overlap information for every function (with calling " + "context for csspgo) in test " + "profile with max count value greater then the parameter value"), + cl::sub(OverlapSubcommand)); + +// Options unique to show subcommand. +cl::opt ShowCounts("counts", cl::init(false), + cl::desc("Show counter values for shown functions"), + cl::sub(ShowSubcommand)); +cl::opt + SFormat("show-format", cl::init(ShowFormat::Text), + cl::desc("Emit output in the selected format if supported"), + cl::sub(ShowSubcommand), + cl::values(clEnumValN(ShowFormat::Text, "text", + "emit normal text output (default)"), + clEnumValN(ShowFormat::Json, "json", "emit JSON"), + clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML"))); +// TODO: Consider replacing this with `--show-format=text-encoding`. +cl::opt + TextFormat("text", cl::init(false), + cl::desc("Show instr profile data in text dump format"), + cl::sub(ShowSubcommand)); +cl::opt + JsonFormat("json", + cl::desc("Show sample profile data in the JSON format " + "(deprecated, please use --show-format=json)"), + cl::sub(ShowSubcommand)); +cl::opt ShowIndirectCallTargets( + "ic-targets", cl::init(false), + cl::desc("Show indirect call site target values for shown functions"), + cl::sub(ShowSubcommand)); +cl::opt ShowVTables("show-vtables", cl::init(false), + cl::desc("Show vtable names for shown functions"), + cl::sub(ShowSubcommand)); +cl::opt ShowMemOPSizes( + "memop-sizes", cl::init(false), + cl::desc("Show the profiled sizes of the memory intrinsic calls " + "for shown functions"), + cl::sub(ShowSubcommand)); +cl::opt ShowDetailedSummary("detailed-summary", cl::init(false), + cl::desc("Show detailed profile summary"), + cl::sub(ShowSubcommand)); +cl::list DetailedSummaryCutoffs( + cl::CommaSeparated, "detailed-summary-cutoffs", + cl::desc( + "Cutoff percentages (times 10000) for generating detailed summary"), + cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand)); +cl::opt + ShowHotFuncList("hot-func-list", cl::init(false), + cl::desc("Show profile summary of a list of hot functions"), + cl::sub(ShowSubcommand)); +cl::opt ShowAllFunctions("all-functions", cl::init(false), + cl::desc("Details for each and every function"), + cl::sub(ShowSubcommand)); +cl::opt ShowCS("showcs", cl::init(false), + cl::desc("Show context sensitive counts"), + cl::sub(ShowSubcommand)); +cl::opt ShowProfileKind( + cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand), + cl::init(instr), + cl::values(clEnumVal(instr, "Instrumentation profile (default)"), + clEnumVal(sample, "Sample profile"), + clEnumVal(memory, "MemProf memory access profile"))); +cl::opt TopNFunctions( + "topn", cl::init(0), + cl::desc("Show the list of functions with the largest internal counts"), + cl::sub(ShowSubcommand)); +cl::opt ShowValueCutoff( + "value-cutoff", cl::init(0), + cl::desc("Set the count value cutoff. Functions with the maximum count " + "less than this value will not be printed out. (Default is 0)"), + cl::sub(ShowSubcommand)); +cl::opt OnlyListBelow( + "list-below-cutoff", cl::init(false), + cl::desc("Only output names of functions whose max count values are " + "below the cutoff value"), + cl::sub(ShowSubcommand)); +cl::opt ShowProfileSymbolList( + "show-prof-sym-list", cl::init(false), + cl::desc("Show profile symbol list if it exists in the profile. "), + cl::sub(ShowSubcommand)); +cl::opt ShowSectionInfoOnly( + "show-sec-info-only", cl::init(false), + cl::desc("Show the information of each section in the sample profile. " + "The flag is only usable when the sample profile is in " + "extbinary format"), + cl::sub(ShowSubcommand)); +cl::opt ShowBinaryIds("binary-ids", cl::init(false), + cl::desc("Show binary ids in the profile. "), + cl::sub(ShowSubcommand)); +cl::opt ShowTemporalProfTraces( + "temporal-profile-traces", + cl::desc("Show temporal profile traces in the profile."), + cl::sub(ShowSubcommand)); + +cl::opt + ShowCovered("covered", cl::init(false), + cl::desc("Show only the functions that have been executed."), + cl::sub(ShowSubcommand)); + +cl::opt ShowProfileVersion("profile-version", cl::init(false), + cl::desc("Show profile version. "), + cl::sub(ShowSubcommand)); + +// We use this string to indicate that there are +// multiple static functions map to the same name. +const std::string DuplicateNameStr = "----"; static void warn(Twine Message, std::string Whence = "", std::string Hint = "") { @@ -112,11 +471,6 @@ static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { exitWithError(EC.message(), std::string(Whence)); } -namespace { -enum ProfileKinds { instr, sample, memory }; -enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; -} - static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC, StringRef Whence = "") { if (FailMode == failIfAnyAreInvalid) @@ -254,7 +608,8 @@ static void overlapInput(const std::string &BaseFilename, /// Load an input into a writer context. static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, const InstrProfCorrelator *Correlator, - const StringRef ProfiledBinary, WriterContext *WC) { + const StringRef ProfiledBinary, WriterContext *WC, + const bool KeepVTableSymbols) { std::unique_lock CtxGuard{WC->Lock}; // Copy the filename, because llvm::ThreadPool copied the input "const @@ -306,7 +661,22 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, } auto FS = vfs::getRealFileSystem(); - auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator); + // TODO: This only saves the first non-fatal error from InstrProfReader, and + // then added to WriterContext::Errors. However, this is not extensible, if + // we have more non-fatal errors from InstrProfReader in the future. How + // should this interact with different -failure-mode? + std::optional> ReaderWarning; + auto Warn = [&](Error E) { + if (ReaderWarning) { + consumeError(std::move(E)); + return; + } + // Only show the first time an error occurs in this file. + auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); + ReaderWarning = {make_error(ErrCode, Msg), Filename}; + }; + auto ReaderOrErr = + InstrProfReader::create(Input.Filename, *FS, Correlator, Warn); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning silently. auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); @@ -347,6 +717,14 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, }); } + if (KeepVTableSymbols) { + const InstrProfSymtab &symtab = Reader->getSymtab(); + const auto &VTableNames = symtab.getVTableNames(); + + for (const auto &kv : VTableNames) + WC->Writer.addVTableName(kv.getKey()); + } + if (Reader->hasTemporalProfile()) { auto &Traces = Reader->getTemporalProfTraces(Input.Weight); if (!Traces.empty()) @@ -354,14 +732,23 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, Traces, Reader->getTemporalProfTraceStreamSize()); } if (Reader->hasError()) { - if (Error E = Reader->getError()) + if (Error E = Reader->getError()) { WC->Errors.emplace_back(std::move(E), Filename); + return; + } } std::vector BinaryIds; - if (Error E = Reader->readBinaryIds(BinaryIds)) + if (Error E = Reader->readBinaryIds(BinaryIds)) { WC->Errors.emplace_back(std::move(E), Filename); + return; + } WC->Writer.addBinaryIds(BinaryIds); + + if (ReaderWarning) { + WC->Errors.emplace_back(std::move(ReaderWarning->first), + ReaderWarning->second); + } } /// Merge the \p Src writer context into \p Dst. @@ -409,20 +796,30 @@ mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, ProfileFormat OutputFormat, uint64_t TraceReservoirSize, uint64_t MaxTraceLength, bool OutputSparse, unsigned NumThreads, FailureMode FailMode, - const StringRef ProfiledBinary) { + const StringRef ProfiledBinary, + const bool KeepVTableSymbols) { if (OutputFormat == PF_Compact_Binary) exitWithError("Compact Binary is deprecated"); if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("unknown format is specified"); - std::unique_ptr Correlator; + // TODO: Maybe we should support correlation with mixture of different + // correlation modes(w/wo debug-info/object correlation). + std::string CorrelateFilename; + ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE; if (!DebugInfoFilename.empty()) { - if (auto Err = - InstrProfCorrelator::get(DebugInfoFilename).moveInto(Correlator)) - exitWithError(std::move(Err), DebugInfoFilename); - if (auto Err = Correlator->correlateProfileData()) - exitWithError(std::move(Err), DebugInfoFilename); + CorrelateFilename = DebugInfoFilename; + CorrelateKind = ProfCorrelatorKind::DEBUG_INFO; + } + + std::unique_ptr Correlator; + if (CorrelateKind != InstrProfCorrelator::NONE) { + if (auto Err = InstrProfCorrelator::get(CorrelateFilename, CorrelateKind) + .moveInto(Correlator)) + exitWithError(std::move(Err), CorrelateFilename); + if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings)) + exitWithError(std::move(Err), CorrelateFilename); } std::mutex ErrorLock; @@ -443,7 +840,7 @@ mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, if (NumThreads == 1) { for (const auto &Input : Inputs) loadInput(Input, Remapper, Correlator.get(), ProfiledBinary, - Contexts[0].get()); + Contexts[0].get(), KeepVTableSymbols); } else { ThreadPool Pool(hardware_concurrency(NumThreads)); @@ -451,7 +848,7 @@ mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, unsigned Ctx = 0; for (const auto &Input : Inputs) { Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary, - Contexts[Ctx].get()); + Contexts[Ctx].get(), KeepVTableSymbols); Ctx = (Ctx + 1) % NumThreads; } Pool.wait(); @@ -484,7 +881,7 @@ mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, warn(toString(std::move(ErrorPair.first)), ErrorPair.second); } } - if (NumErrors == Inputs.size() || + if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) || (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) exitWithError("no profile can be merged"); @@ -844,7 +1241,7 @@ static void supplementInstrProfile( const WeightedFileVector &Inputs, StringRef SampleFilename, StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, - unsigned InstrProfColdThreshold) { + unsigned InstrProfColdThreshold, const bool KeepVTableSymbols) { if (OutputFilename.compare("-") == 0) exitWithError("cannot write indexed profdata format to stdout"); if (Inputs.size() != 1) @@ -870,7 +1267,8 @@ static void supplementInstrProfile( SmallSet WriterErrorCodes; auto WC = std::make_unique(OutputSparse, ErrorLock, WriterErrorCodes); - loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get()); + loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get(), + KeepVTableSymbols); if (WC->Errors.size() > 0) exitWithError(std::move(WC->Errors[0].first), InstrFilename); @@ -1193,10 +1591,12 @@ static int merge_main(int argc, const char *argv[]) { "GCC encoding (only meaningful for -sample)"))); cl::opt FailureMode( "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"), - cl::values(clEnumValN(failIfAnyAreInvalid, "any", - "Fail if any profile is invalid."), - clEnumValN(failIfAllAreInvalid, "all", - "Fail only if all profiles are invalid."))); + cl::values( + clEnumValN(warnOnly, "warn", "Do not fail and just print warnings."), + clEnumValN(failIfAnyAreInvalid, "any", + "Fail if any profile is invalid."), + clEnumValN(failIfAllAreInvalid, "all", + "Fail only if all profiles are invalid."))); cl::opt OutputSparse("sparse", cl::init(false), cl::desc("Generate a sparse profile (only meaningful for -instr)")); cl::opt NumThreads( @@ -1277,6 +1677,9 @@ static int merge_main(int argc, const char *argv[]) { "drop-profile-symbol-list", cl::init(false), cl::Hidden, cl::desc("Drop the profile symbol list when merging AutoFDO profiles " "(only meaningful for -sample)")); + cl::opt KeepVTableSymbols( + "keep-vtable-symbols", cl::init(false), cl::Hidden, + cl::desc("If true, keep the vtable symbols in indexed profiles")); // WARNING: This reservoir size value is propagated to any input indexed // profiles for simplicity. Changing this value between invocations could // result in sample bias. @@ -1323,7 +1726,8 @@ static int merge_main(int argc, const char *argv[]) { supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename, OutputFormat, OutputSparse, SupplMinSizeThreshold, - ZeroCounterThreshold, InstrProfColdThreshold); + ZeroCounterThreshold, InstrProfColdThreshold, + KeepVTableSymbols); return 0; } @@ -1332,7 +1736,7 @@ static int merge_main(int argc, const char *argv[]) { OutputFilename, OutputFormat, TemporalProfTraceReservoirSize, TemporalProfMaxTraceLength, OutputSparse, NumThreads, - FailureMode, ProfiledBinary); + FailureMode, ProfiledBinary, KeepVTableSymbols); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, @@ -1364,7 +1768,8 @@ static void overlapInstrProfile(const std::string &BaseFilename, OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; exit(0); } - loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context); + loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context, + /*KeepVTableSymbols=*/false); overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, IsCS); Overlap.dump(OS); @@ -2412,7 +2817,7 @@ static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK, static int showInstrProfile( const std::string &Filename, bool ShowCounts, uint32_t TopN, - bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, + bool ShowIndirectCallTargets, bool ShowVTables, bool ShowMemOPSizes, bool ShowDetailedSummary, std::vector DetailedSummaryCutoffs, bool ShowAllFunctions, bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow, const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds, @@ -2548,6 +2953,10 @@ static int showInstrProfile( OS << " Indirect Call Site Count: " << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; + if (ShowVTables) + OS << " Number of instrumented vtables: " + << Func.getNumValueSites(IPVK_VTableTarget) << "\n"; + uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); if (ShowMemOPSizes && NumMemOPCalls > 0) OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls @@ -2569,6 +2978,13 @@ static int showInstrProfile( &(Reader->getSymtab())); } + if (ShowVTables) { + OS << " VTable Results:\n"; + traverseAllValueSites(Func, IPVK_VTableTarget, + VPStats[IPVK_VTableTarget], OS, + &(Reader->getSymtab())); + } + if (ShowMemOPSizes && NumMemOPCalls > 0) { OS << " Memory Intrinsic Size Results:\n"; traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, @@ -2617,6 +3033,11 @@ static int showInstrProfile( VPStats[IPVK_IndirectCallTarget]); } + if (ShownFunctions && ShowVTables) { + OS << "Statistics for vtable profile:\n"; + showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]); + } + if (ShownFunctions && ShowMemOPSizes) { OS << "Statistics for memory intrinsic calls sizes profile:\n"; showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); @@ -2881,15 +3302,17 @@ static int showDebugInfoCorrelation(const std::string &Filename, if (SFormat == ShowFormat::Json) exitWithError("JSON output is not supported for debug info correlation"); std::unique_ptr Correlator; - if (auto Err = InstrProfCorrelator::get(Filename).moveInto(Correlator)) + if (auto Err = + InstrProfCorrelator::get(Filename, InstrProfCorrelator::DEBUG_INFO) + .moveInto(Correlator)) exitWithError(std::move(Err), Filename); if (SFormat == ShowFormat::Yaml) { - if (auto Err = Correlator->dumpYaml(OS)) + if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS)) exitWithError(std::move(Err), Filename); return 0; } - if (auto Err = Correlator->correlateProfileData()) + if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings)) exitWithError(std::move(Err), Filename); InstrProfSymtab Symtab; @@ -2931,6 +3354,8 @@ static int show_main(int argc, const char *argv[]) { cl::opt ShowIndirectCallTargets( "ic-targets", cl::init(false), cl::desc("Show indirect call site target values for shown functions")); + cl::opt ShowVTables("show-vtables", cl::init(false), + cl::desc("Show vtable names for shown functions")); cl::opt ShowMemOPSizes( "memop-sizes", cl::init(false), cl::desc("Show the profiled sizes of the memory intrinsic calls " @@ -3026,7 +3451,7 @@ static int show_main(int argc, const char *argv[]) { if (ProfileKind == instr) return showInstrProfile( - Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, + Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, ShowVTables, ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion, @@ -3069,17 +3494,11 @@ static int order_main(int argc, const char *argv[]) { WithColor::note() << "# Ordered " << Nodes.size() << " functions\n"; for (auto &N : Nodes) { - auto FuncName = Reader->getSymtab().getFuncName(N.Id); - if (FuncName.contains(':')) { - // GlobalValue::getGlobalIdentifier() prefixes the filename if the symbol - // is local. This logic will break if there is a colon in the filename, - // but we cannot use rsplit() because ObjC symbols can have colons. - auto [Filename, ParsedFuncName] = FuncName.split(':'); - // Emit a comment describing where this symbol came from + auto [Filename, ParsedFuncName] = + getParsedIRPGOFuncName(Reader->getSymtab().getFuncName(N.Id)); + if (!Filename.empty()) OS << "# " << Filename << "\n"; - FuncName = ParsedFuncName; - } - OS << FuncName << "\n"; + OS << ParsedFuncName << "\n"; } return 0; } diff --git a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp index eff8e27d36d64131ac09edd1bd21b250cd02b2b8..9c4e95c9a635eb8d21131fd61a0f9410097d1d63 100644 --- a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp @@ -8,9 +8,12 @@ #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/AsmParser/Parser.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" @@ -24,6 +27,21 @@ static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { return Mod; } +// Returns a constant representing the vtable's address point specified by the +// offset. +static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, + uint32_t AddressPointOffset) { + Module &M = *VTable->getParent(); + LLVMContext &Context = M.getContext(); + assert(AddressPointOffset < + M.getDataLayout().getTypeAllocSize(VTable->getValueType()) && + "Out-of-bound access"); + + return ConstantExpr::getInBoundsGetElementPtr( + Type::getInt8Ty(Context), VTable, + llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset)); +} + TEST(CallPromotionUtilsTest, TryPromoteCall) { LLVMContext C; std::unique_ptr M = parseIR(C, @@ -368,3 +386,73 @@ declare %struct2 @_ZN4Impl3RunEv(%class.Impl* %this) bool IsPromoted = tryPromoteCall(*CI); EXPECT_FALSE(IsPromoted); } + +TEST(CallPromotionUtilsTest, promoteCallWithVTableCmp) { + LLVMContext C; + std::unique_ptr M = parseIR(C, + R"IR( +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTV5Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !0 +@_ZTV8Derived1 = constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev], [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !0, !type !1, !type !2 +@_ZTV8Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base25func2Ev], [4 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !3, !type !4, !type !5, !type !6 + +define i32 @testfunc(ptr %d) { +entry: + %vtable = load ptr, ptr %d, !prof !7 + %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + %0 = load ptr, ptr %vfn + %call = tail call i32 %0(ptr %d), !prof !8 + ret i32 %call +} + +define i32 @_ZN5Base15func1Ev(ptr %this) { +entry: + ret i32 2 +} + +declare i32 @_ZN5Base25func2Ev(ptr) +declare i32 @_ZN5Base15func0Ev(ptr) +declare void @_ZN5Base35func3Ev(ptr) + +!0 = !{i64 16, !"_ZTS5Base1"} +!1 = !{i64 48, !"_ZTS5Base2"} +!2 = !{i64 16, !"_ZTS8Derived1"} +!3 = !{i64 64, !"_ZTS5Base1"} +!4 = !{i64 40, !"_ZTS5Base2"} +!5 = !{i64 16, !"_ZTS5Base3"} +!6 = !{i64 16, !"_ZTS8Derived2"} +!7 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 800, i64 5035968517245772950, i64 500, i64 3215870116411581797, i64 300} +!8 = !{!"VP", i32 0, i64 1600, i64 6804820478065511155, i64 1600})IR"); + + Function *F = M->getFunction("testfunc"); + CallInst *CI = dyn_cast(&*std::next(F->front().rbegin())); + ASSERT_TRUE(CI && CI->isIndirectCall()); + + // Create the constant and the branch weights + SmallVector VTableAddressPoints; + + for (auto &[VTableName, AddressPointOffset] : {std::pair{"_ZTV5Base1", 16}, + {"_ZTV8Derived1", 16}, + {"_ZTV8Derived2", 64}}) + VTableAddressPoints.push_back(getVTableAddressPointOffset( + M->getGlobalVariable(VTableName), AddressPointOffset)); + + MDBuilder MDB(C); + MDNode *BranchWeights = MDB.createBranchWeights(1600, 0); + + size_t OrigEntryBBSize = F->front().size(); + + LoadInst *VPtr = dyn_cast(&*F->front().begin()); + + Function *Callee = M->getFunction("_ZN5Base15func1Ev"); + // Tests that promoted direct call is returned. + CallBase &DirectCB = promoteCallWithVTableCmp( + *CI, VPtr, Callee, VTableAddressPoints, BranchWeights); + EXPECT_EQ(DirectCB.getCalledOperand(), Callee); + + // Promotion inserts 3 icmp instructions and 2 or instructions, and removes + // 1 call instruction from the entry block. + EXPECT_EQ(F->front().size(), OrigEntryBBSize + 4); +}