diff --git a/clang/include/clang/CodeGen/BackendUtil.h b/clang/include/clang/CodeGen/BackendUtil.h index d97af65a3d013da6d430e63c747be7a1f9526a84..cdbfe4ca5e654bdd1b3ea2b3ac16cbc8ff94ae1a 100644 --- a/clang/include/clang/CodeGen/BackendUtil.h +++ b/clang/include/clang/CodeGen/BackendUtil.h @@ -16,8 +16,12 @@ namespace llvm { class BitcodeModule; template class Expected; + template class IntrusiveRefCntPtr; class Module; class MemoryBufferRef; + namespace vfs { + class FileSystem; + } // namespace vfs } namespace clang { @@ -40,6 +44,7 @@ namespace clang { const CodeGenOptions &CGOpts, const TargetOptions &TOpts, const LangOptions &LOpts, StringRef TDesc, llvm::Module *M, BackendAction Action, + llvm::IntrusiveRefCntPtr VFS, std::unique_ptr OS); void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 10d6bff25e6d6a7985b9b2e842ee717bb7489369..2b43d0e38bf7072fb87de7f8567462d4fea1fd23 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -123,6 +124,7 @@ class EmitAssemblyHelper { const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; Module *TheModule; + IntrusiveRefCntPtr VFS; Timer CodeGenerationTime; @@ -187,9 +189,10 @@ public: const HeaderSearchOptions &HeaderSearchOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, Module *M) + const LangOptions &LOpts, Module *M, + IntrusiveRefCntPtr VFS) : Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts), - TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), + TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), VFS(std::move(VFS)), CodeGenerationTime("codegen", "Code Generation Time"), TargetTriple(TheModule->getTargetTriple()) {} @@ -767,32 +770,33 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (CodeGenOpts.hasProfileIRInstr()) // -fprofile-generate. - PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() - ? getDefaultProfileGenName() - : CodeGenOpts.InstrProfileOutput, - "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = PGOOptions( + CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() + : CodeGenOpts.InstrProfileOutput, + "", "", nullptr, PGOOptions::IRInstr, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse : PGOOptions::NoCSAction; - PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, PGOOptions::IRUse, - CSAction, CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, VFS, PGOOptions::IRUse, + CSAction, CodeGenOpts.DebugInfoForProfiling); } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use PGOOpt = PGOOptions( CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile, - PGOOptions::SampleUse, PGOOptions::NoCSAction, + VFS, PGOOptions::SampleUse, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling); else if (CodeGenOpts.PseudoProbeForProfiling) // -fpseudo-probe-for-profiling - PGOOpt = - PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling, true); + PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, + PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling, true); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, PGOOptions::NoCSAction, true); // Check to see if we want to generate a CS profile. @@ -810,12 +814,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline( : CodeGenOpts.InstrProfileOutput; PGOOpt->CSAction = PGOOptions::CSIRInstr; } else - PGOOpt = PGOOptions("", - CodeGenOpts.InstrProfileOutput.empty() - ? getDefaultProfileGenName() - : CodeGenOpts.InstrProfileOutput, - "", PGOOptions::NoAction, PGOOptions::CSIRInstr, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions("", + CodeGenOpts.InstrProfileOutput.empty() + ? getDefaultProfileGenName() + : CodeGenOpts.InstrProfileOutput, + "", nullptr, PGOOptions::NoAction, PGOOptions::CSIRInstr, + CodeGenOpts.DebugInfoForProfiling); } if (TM) TM->setPGOOption(PGOOpt); @@ -1219,9 +1224,9 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, - StringRef TDesc, Module *M, - BackendAction Action, + const LangOptions &LOpts, StringRef TDesc, + Module *M, BackendAction Action, + IntrusiveRefCntPtr VFS, std::unique_ptr OS) { llvm::TimeTraceScope TimeScope("Backend"); @@ -1264,7 +1269,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, } } - EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M); + EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M, VFS); AsmHelper.EmitAssembly(Action, std::move(OS)); // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 2b219267869e4a8100c3dc33ee0a0c6836821fd8..1d6922176b354296daa4b12487006ec33f4c3168 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -115,6 +115,7 @@ namespace clang { const LangOptions &LangOpts; std::unique_ptr AsmOutStream; ASTContext *Context; + IntrusiveRefCntPtr FS; Timer LLVMIRGeneration; unsigned LLVMIRGenerationRefCount; @@ -147,7 +148,7 @@ namespace clang { public: BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr FS, + IntrusiveRefCntPtr VFS, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, @@ -158,10 +159,10 @@ namespace clang { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - AsmOutStream(std::move(OS)), Context(nullptr), + AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, InFile, std::move(FS), HeaderSearchOpts, + Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { TimerIsEnabled = CodeGenOpts.TimePasses; @@ -173,7 +174,7 @@ namespace clang { // to use the clang diagnostic handler for IR input files. It avoids // initializing the OS field. BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr FS, + IntrusiveRefCntPtr VFS, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, @@ -183,10 +184,10 @@ namespace clang { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - Context(nullptr), + Context(nullptr), FS(VFS), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, "", std::move(FS), HeaderSearchOpts, + Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)), CurLinkModule(Module) { TimerIsEnabled = CodeGenOpts.TimePasses; @@ -381,7 +382,7 @@ namespace clang { EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, C.getTargetInfo().getDataLayoutString(), - getModule(), Action, std::move(AsmOutStream)); + getModule(), Action, FS, std::move(AsmOutStream)); Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); @@ -1238,10 +1239,10 @@ void CodeGenAction::ExecuteAction() { std::unique_ptr OptRecordFile = std::move(*OptRecordFileOrErr); - EmitBackendOutput(Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, - TargetOpts, CI.getLangOpts(), - CI.getTarget().getDataLayoutString(), TheModule.get(), BA, - std::move(OS)); + EmitBackendOutput( + Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, TargetOpts, + CI.getLangOpts(), CI.getTarget().getDataLayoutString(), TheModule.get(), + BA, CI.getFileManager().getVirtualFileSystemPtr(), std::move(OS)); if (OptRecordFile) OptRecordFile->keep(); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 12d602fed69320ec11135bf5fa71cc14ca712253..d5311806eacea0dad18794e1d92cb1cf9c199618 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -107,11 +107,11 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) - : Context(C), LangOpts(C.getLangOpts()), FS(std::move(FS)), - HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), - TheModule(M), Diags(diags), Target(C.getTargetInfo()), - ABI(createCXXABI(*this)), VMContext(M.getContext()), Types(*this), - VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { + : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), + PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), + Target(C.getTargetInfo()), ABI(createCXXABI(*this)), + VMContext(M.getContext()), Types(*this), VTables(*this), + SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); @@ -185,7 +185,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( - CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); + CodeGenOpts.ProfileInstrumentUsePath, *FS, + CodeGenOpts.ProfileRemappingFile); // We're checking for profile read errors in CompilerInvocation, so if // there was an error it should've already been caught. If it hasn't been // somehow, trip an assertion. diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 677b66d3e1dcb69ebc12c903e63ef577a2fa5710..2f2126eff527b0ac6038d74c600b460b9e6c5217 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -320,7 +320,7 @@ public: clang::EmitBackendOutput( Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayoutString(), M.get(), - BackendAction::Backend_EmitLL, + BackendAction::Backend_EmitLL, FS, std::make_unique(Buffer)); llvm::dbgs() << Buffer; }); @@ -329,7 +329,7 @@ public: clang::EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayoutString(), M.get(), - BackendAction::Backend_EmitObj, std::move(OS)); + BackendAction::Backend_EmitObj, FS, std::move(OS)); // Free the memory for the temporary buffer. llvm::SmallVector Empty; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0bb9c8c83c63bc68520f48e79a587041f8473b5b..ed483d22550b5238e7f6dfc365a9d6cc9b1519ff 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1304,8 +1304,9 @@ static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) { // Set the profile kind using fprofile-instrument-use-path. static void setPGOUseInstrumentor(CodeGenOptions &Opts, const Twine &ProfileName, + llvm::vfs::FileSystem &FS, DiagnosticsEngine &Diags) { - auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName); + auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName, FS); if (auto E = ReaderOrErr.takeError()) { unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "Error in reading profile %0: %1"); @@ -1724,9 +1725,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, : codegenoptions::DebugTemplateNamesKind::Mangled); } - if (!Opts.ProfileInstrumentUsePath.empty()) - setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath, Diags); - if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ)) { Opts.TimePasses = true; @@ -1962,8 +1960,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.OptimizationRemarkAnalysis.hasValidPattern(); bool UsingSampleProfile = !Opts.SampleProfileFile.empty(); - bool UsingProfile = UsingSampleProfile || - (Opts.getProfileUse() != CodeGenOptions::ProfileNone); + bool UsingProfile = + UsingSampleProfile || !Opts.ProfileInstrumentUsePath.empty(); if (Opts.DiagnosticsWithHotness && !UsingProfile && // An IR file will contain PGO as metadata @@ -4563,6 +4561,17 @@ bool CompilerInvocation::CreateFromArgsImpl( append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); } + // Set PGOOptions. Need to create a temporary VFS to read the profile + // to determine the PGO type. + if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty()) { + auto FS = + createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles, + Diags, llvm::vfs::getRealFileSystem()); + setPGOUseInstrumentor(Res.getCodeGenOpts(), + Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS, + Diags); + } + FixupInvocation(Res, Diags, Args, DashX); return Diags.getNumErrors() == NumErrorsBefore; diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index 89a624555fe441ccd376da42204df11b7c6fcc5a..1ed8531fe2217e6b5d11ec84151b664364afdd58 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -161,6 +161,12 @@ OPTIONS coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. +.. option:: --convert-sample-profile-layout=[nest|flat] + + Convert the merged profile into a profile with a new layout. Supported + layout are ``nest``(Nested profile, the input should be CS flat profile) and + ``flat``(Profile with nested inlinees flattened out). + .. option:: --supplement-instr-with-sample= Supplement an instrumentation profile with sample profile. The sample profile diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h index f54c4b5891bec73a81c7049626761317c38dd182..221e966e2b9e0881f70e8b838ec36ec062a886be 100644 --- a/llvm/include/llvm/CodeGen/MIRSampleProfile.h +++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H #define LLVM_CODEGEN_MIRSAMPLEPROFILE_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/Discriminator.h" @@ -26,6 +27,10 @@ class MachineBlockFrequencyInfo; class MachineFunction; class Module; +namespace vfs { +class FileSystem; +} // namespace vfs + using namespace sampleprof; class MIRProfileLoader; @@ -41,7 +46,8 @@ public: /// FS bits will only use the '1' bits in the Mask. MIRProfileLoaderPass(std::string FileName = "", std::string RemappingFileName = "", - FSDiscriminatorPass P = FSDiscriminatorPass::Pass1); + FSDiscriminatorPass P = FSDiscriminatorPass::Pass1, + IntrusiveRefCntPtr FS = nullptr); /// getMachineFunction - Return the last machine function computed. const MachineFunction *getMachineFunction() const { return MF; } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index b331c9a19fd12cfde5133648e3781cce5372496b..66d213c2877b7d0506a1a6345aad5cd4923496fa 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -31,6 +31,11 @@ class Pass; class TargetMachine; class raw_ostream; +template class IntrusiveRefCntPtr; +namespace vfs { +class FileSystem; +} // namespace vfs + } // End llvm namespace // List of target independent CodeGen pass IDs. @@ -551,9 +556,10 @@ namespace llvm { createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); /// Read Flow Sensitive Profile. - FunctionPass *createMIRProfileLoaderPass(std::string File, - std::string RemappingFile, - sampleprof::FSDiscriminatorPass P); + FunctionPass * + createMIRProfileLoaderPass(std::string File, std::string RemappingFile, + sampleprof::FSDiscriminatorPass P, + IntrusiveRefCntPtr FS); /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index b7e6764d428bf1c4917d4bc8cd2f0c2537a69601..9d9f256f1ceda208ad8f1d1f3743805dc9de408c 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -32,6 +32,10 @@ class StringRef; class AAManager; class TargetMachine; class ModuleSummaryIndex; +template class IntrusiveRefCntPtr; +namespace vfs { +class FileSystem; +} // namespace vfs /// Tunable parameters for passes in the default pipelines. class PipelineTuningOptions { @@ -567,7 +571,8 @@ public: /// Add PGOInstrumenation passes for O0 only. void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, - std::string ProfileRemappingFile); + std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS); /// Returns PIC. External libraries can use this to register pass /// instrumentation callbacks. @@ -607,7 +612,8 @@ private: void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile, - ThinOrFullLTOPhase LTOPhase); + ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); // Extension Point callbacks diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index bdb77286246863b5c728b0b712b0dc6d3d009cd8..37ddae87d57427fbde7faf615306f222ef1947da 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -47,6 +47,10 @@ namespace object { class BuildIDFetcher; } // namespace object +namespace vfs { +class FileSystem; +} // namespace vfs + namespace coverage { class CoverageMappingReader; @@ -616,7 +620,8 @@ public: /// Ignores non-instrumented object files unless all are not instrumented. static Expected> load(ArrayRef ObjectFilenames, StringRef ProfileFilename, - ArrayRef Arches = std::nullopt, StringRef CompilationDir = "", + vfs::FileSystem &FS, ArrayRef Arches = std::nullopt, + StringRef CompilationDir = "", const object::BuildIDFetcher *BIDFetcher = nullptr); /// The number of functions that couldn't have their profiles mapped. diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 9c216e57d00594905ea006544c5140cf280bd0b5..c46fb8bf00b85ca3a0ff5aaf5486ca9f6be5db2b 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -41,6 +41,10 @@ namespace llvm { class InstrProfReader; +namespace vfs { +class FileSystem; +} // namespace vfs + /// A file format agnostic iterator over profiling data. template @@ -190,7 +194,8 @@ public: /// Factory method to create an appropriately typed reader for the given /// instrprof file. static Expected> - create(const Twine &Path, const InstrProfCorrelator *Correlator = nullptr); + create(const Twine &Path, vfs::FileSystem &FS, + const InstrProfCorrelator *Correlator = nullptr); static Expected> create(std::unique_ptr Buffer, @@ -693,7 +698,8 @@ public: /// Factory method to create an indexed reader. static Expected> - create(const Twine &Path, const Twine &RemappingPath = ""); + create(const Twine &Path, vfs::FileSystem &FS, + const Twine &RemappingPath = ""); static Expected> create(std::unique_ptr Buffer, diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 13f0157222eca55fc45201e820b6b0cd375c8122..4b57dbafada08c5bfff9a701fba4384b77e4f07d 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -96,6 +96,12 @@ enum SampleProfileFormat { SPF_Binary = 0xff }; +enum SampleProfileLayout { + SPL_None = 0, + SPL_Nest = 0x1, + SPL_Flat = 0x2, +}; + static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | @@ -427,6 +433,14 @@ public: void print(raw_ostream &OS, unsigned Indent) const; void dump() const; + bool operator==(const SampleRecord &Other) const { + return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets; + } + + bool operator!=(const SampleRecord &Other) const { + return !(*this == Other); + } + private: uint64_t NumSamples = 0; CallTargetMap CallTargets; @@ -709,6 +723,8 @@ using BodySampleMap = std::map; // memory, which is *very* significant for large profiles. using FunctionSamplesMap = std::map>; using CallsiteSampleMap = std::map; +using LocToLocMap = + std::unordered_map; /// Representation of the samples collected for a function. /// @@ -739,6 +755,8 @@ public: void setTotalSamples(uint64_t Num) { TotalSamples = Num; } + void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; TotalHeadSamples = @@ -821,12 +839,26 @@ public: } } + // Query the stale profile matching results and remap the location. + const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { + // There is no remapping if the profile is not stale or the matching gives + // the same location. + if (!IRToProfileLocationMap) + return IRLoc; + const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); + if (ProfileLoc != IRToProfileLocationMap->end()) + return ProfileLoc->second; + else + return IRLoc; + } + /// Return the number of samples collected at the given location. /// Each location is specified by \p LineOffset and \p Discriminator. /// If the location is not found in profile, return error. ErrorOr findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const { - const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + const auto &ret = BodySamples.find( + mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); if (ret == BodySamples.end()) return std::error_code(); return ret->second.getSamples(); @@ -837,7 +869,8 @@ public: /// If the location is not found in profile, return error. ErrorOr findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { - const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + const auto &ret = BodySamples.find( + mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); if (ret == BodySamples.end()) return std::error_code(); return ret->second.getCallTargets(); @@ -847,7 +880,7 @@ public: /// CallSite. If the location is not found in profile, return error. ErrorOr findCallTargetMapAt(const LineLocation &CallSite) const { - const auto &Ret = BodySamples.find(CallSite); + const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite)); if (Ret == BodySamples.end()) return std::error_code(); return Ret->second.getCallTargets(); @@ -855,13 +888,13 @@ public: /// Return the function samples at the given callsite location. FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { - return CallsiteSamples[Loc]; + return CallsiteSamples[mapIRLocToProfileLoc(Loc)]; } /// Returns the FunctionSamplesMap at the given \p Loc. const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const { - auto iter = CallsiteSamples.find(Loc); + auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); if (iter == CallsiteSamples.end()) return nullptr; return &iter->second; @@ -926,6 +959,8 @@ public: return CallsiteSamples; } + CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; } + /// Return the maximum of sample counts in a function body. When SkipCallSite /// is false, which is the default, the return count includes samples in the /// inlined functions. When SkipCallSite is true, the return count only @@ -1023,6 +1058,11 @@ public: uint64_t getFunctionHash() const { return FunctionHash; } + void setIRToProfileLocationMap(const LocToLocMap *LTLM) { + assert(IRToProfileLocationMap == nullptr && "this should be set only once"); + IRToProfileLocationMap = LTLM; + } + /// Return the canonical name for a function, taking into account /// suffix elision policy attributes. static StringRef getCanonicalFnName(const Function &F) { @@ -1149,6 +1189,21 @@ public: // all the inline instances and names of call targets. void findAllNames(DenseSet &NameSet) const; + bool operator==(const FunctionSamples &Other) const { + return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap || + (GUIDToFuncNameMap && Other.GUIDToFuncNameMap && + *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) && + FunctionHash == Other.FunctionHash && Context == Other.Context && + TotalSamples == Other.TotalSamples && + TotalHeadSamples == Other.TotalHeadSamples && + BodySamples == Other.BodySamples && + CallsiteSamples == Other.CallsiteSamples; + } + + bool operator!=(const FunctionSamples &Other) const { + return !(*this == Other); + } + private: /// CFG hash value for the function. uint64_t FunctionHash = 0; @@ -1191,6 +1246,25 @@ private: /// in the call to bar() at line offset 1, the other for all the samples /// collected in the call to baz() at line offset 8. CallsiteSampleMap CallsiteSamples; + + /// IR to profile location map generated by stale profile matching. + /// + /// Each entry is a mapping from the location on current build to the matched + /// location in the "stale" profile. For example: + /// Profiled source code: + /// void foo() { + /// 1 bar(); + /// } + /// + /// Current source code: + /// void foo() { + /// 1 // Code change + /// 2 bar(); + /// } + /// Supposing the stale profile matching algorithm generated the mapping [2 -> + /// 1], the profile query using the location of bar on the IR which is 2 will + /// be remapped to 1 and find the location of bar in the profile. + const LocToLocMap *IRToProfileLocationMap = nullptr; }; raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); @@ -1251,12 +1325,16 @@ private: SampleProfileMap &ProfileMap; }; -// CSProfileConverter converts a full context-sensitive flat sample profile into -// a nested context-sensitive sample profile. -class CSProfileConverter { +/// Helper class for profile conversion. +/// +/// It supports full context-sensitive profile to nested profile conversion, +/// nested profile to flatten profile conversion, etc. +class ProfileConverter { public: - CSProfileConverter(SampleProfileMap &Profiles); - void convertProfiles(); + ProfileConverter(SampleProfileMap &Profiles); + // Convert a full context-sensitive flat sample profile into a nested sample + // profile. + void convertCSProfiles(); struct FrameNode { FrameNode(StringRef FName = StringRef(), FunctionSamples *FSamples = nullptr, @@ -1276,9 +1354,85 @@ public: StringRef CalleeName); }; + static void flattenProfile(SampleProfileMap &ProfileMap, + bool ProfileIsCS = false) { + SampleProfileMap TmpProfiles; + flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); + ProfileMap = std::move(TmpProfiles); + } + + static void flattenProfile(const SampleProfileMap &InputProfiles, + SampleProfileMap &OutputProfiles, + bool ProfileIsCS = false) { + if (ProfileIsCS) { + for (const auto &I : InputProfiles) + OutputProfiles[I.second.getName()].merge(I.second); + // Retain the profile name and clear the full context for each function + // profile. + for (auto &I : OutputProfiles) + I.second.setContext(SampleContext(I.first)); + } else { + for (const auto &I : InputProfiles) + flattenNestedProfile(OutputProfiles, I.second); + } + } + private: + static void flattenNestedProfile(SampleProfileMap &OutputProfiles, + const FunctionSamples &FS) { + // To retain the context, checksum, attributes of the original profile, make + // a copy of it if no profile is found. + SampleContext &Context = FS.getContext(); + auto Ret = OutputProfiles.emplace(Context, FS); + FunctionSamples &Profile = Ret.first->second; + if (Ret.second) { + // When it's the copy of the old profile, just clear all the inlinees' + // samples. + Profile.getCallsiteSamples().clear(); + // We recompute TotalSamples later, so here set to zero. + Profile.setTotalSamples(0); + } else { + for (const auto &Line : FS.getBodySamples()) { + Profile.addBodySamples(Line.first.LineOffset, Line.first.Discriminator, + Line.second.getSamples()); + } + } + + assert(Profile.getCallsiteSamples().empty() && + "There should be no inlinees' profiles after flattening."); + + // TotalSamples might not be equal to the sum of all samples from + // BodySamples and CallsiteSamples. So here we use "TotalSamples = + // Original_TotalSamples - All_of_Callsite_TotalSamples + + // All_of_Callsite_HeadSamples" to compute the new TotalSamples. + uint64_t TotalSamples = FS.getTotalSamples(); + + for (const auto &I : FS.getCallsiteSamples()) { + for (const auto &Callee : I.second) { + const auto &CalleeProfile = Callee.second; + // Add body sample. + Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, + CalleeProfile.getHeadSamplesEstimate()); + // Add callsite sample. + Profile.addCalledTargetSamples( + I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(), + CalleeProfile.getHeadSamplesEstimate()); + // Update total samples. + TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() + ? TotalSamples - CalleeProfile.getTotalSamples() + : 0; + TotalSamples += CalleeProfile.getHeadSamplesEstimate(); + // Recursively convert callee profile. + flattenNestedProfile(OutputProfiles, CalleeProfile); + } + } + Profile.addTotalSamples(TotalSamples); + + Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); + } + // Nest all children profiles into the profile of Node. - void convertProfiles(FrameNode &Node); + void convertCSProfiles(FrameNode &Node); FrameNode *getOrCreateContextPath(const SampleContext &Context); SampleProfileMap &ProfileMap; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 57e8c8c74e4ecbf09646e455d3d878321c5de03d..703ca81e332d3b4334723f30f2998834a2ba1205 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -251,6 +251,10 @@ namespace llvm { class raw_ostream; class Twine; +namespace vfs { +class FileSystem; +} // namespace vfs + namespace sampleprof { class SampleProfileReader; @@ -270,8 +274,8 @@ public: /// Create a remapper from the given remapping file. The remapper will /// be used for profile read in by Reader. static ErrorOr> - create(const std::string Filename, SampleProfileReader &Reader, - LLVMContext &C); + create(const std::string Filename, vfs::FileSystem &FS, + SampleProfileReader &Reader, LLVMContext &C); /// Create a remapper from the given Buffer. The remapper will /// be used for profile read in by Reader. @@ -450,7 +454,7 @@ public: /// Create a remapper underlying if RemapFilename is not empty. /// Parameter P specifies the FSDiscriminatorPass. static ErrorOr> - create(const std::string Filename, LLVMContext &C, + create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P = FSDiscriminatorPass::Base, const std::string RemapFilename = ""); @@ -458,7 +462,7 @@ public: /// Create a remapper underlying if RemapFilename is not empty. /// Parameter P specifies the FSDiscriminatorPass. static ErrorOr> - create(std::unique_ptr &B, LLVMContext &C, + create(std::unique_ptr &B, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P = FSDiscriminatorPass::Base, const std::string RemapFilename = ""); diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index b1ed0335e9c93a85450a06560e1b8ba867d6e4d4..4edb3b049e704a284c333b0f0e2918b826be75a6 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -35,6 +35,56 @@ enum SectionLayout { NumOfLayout, }; +/// When writing a profile with size limit, user may want to use a different +/// strategy to reduce function count other than dropping functions with fewest +/// samples first. In this case a class implementing the same interfaces should +/// be provided to SampleProfileWriter::writeWithSizeLimit(). +class FunctionPruningStrategy { +protected: + SampleProfileMap &ProfileMap; + size_t OutputSizeLimit; + +public: + /// \p ProfileMap A reference to the original profile map. It will be modified + /// by Erase(). + /// \p OutputSizeLimit Size limit in bytes of the output profile. This is + /// necessary to estimate how many functions to remove. + FunctionPruningStrategy(SampleProfileMap &ProfileMap, size_t OutputSizeLimit) + : ProfileMap(ProfileMap), OutputSizeLimit(OutputSizeLimit) {} + + virtual ~FunctionPruningStrategy() = default; + + /// SampleProfileWriter::writeWithSizeLimit() calls this after every write + /// iteration if the output size still exceeds the limit. This function + /// should erase some functions from the profile map so that the writer tries + /// to write the profile again with fewer functions. At least 1 entry from the + /// profile map must be erased. + /// + /// \p CurrentOutputSize Number of bytes in the output if current profile map + /// is written. + virtual void Erase(size_t CurrentOutputSize) = 0; +}; + +class DefaultFunctionPruningStrategy : public FunctionPruningStrategy { + std::vector SortedFunctions; + +public: + DefaultFunctionPruningStrategy(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit); + + /// In this default implementation, functions with fewest samples are dropped + /// first. Since the exact size of the output cannot be easily calculated due + /// to compression, we use a heuristic to remove as many functions as + /// necessary but not too many, aiming to minimize the number of write + /// iterations. + /// Empirically, functions with larger total sample count contain linearly + /// more sample entries, meaning it takes linearly more space to write them. + /// The cumulative length is therefore quadratic if all functions are sorted + /// by total sample count. + /// TODO: Find better heuristic. + void Erase(size_t CurrentOutputSize) override; +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -50,6 +100,17 @@ public: /// \returns status code of the file update operation. virtual std::error_code write(const SampleProfileMap &ProfileMap); + /// Write sample profiles up to given size limit, using the pruning strategy + /// to drop some functions if necessary. + /// + /// \returns status code of the file update operation. + template + std::error_code writeWithSizeLimit(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit) { + FunctionPruningStrategy Strategy(ProfileMap, OutputSizeLimit); + return writeWithSizeLimitInternal(ProfileMap, OutputSizeLimit, &Strategy); + } + raw_ostream &getOutputStream() { return *OutputStream; } /// Profile writer factory. @@ -79,6 +140,15 @@ protected: // Write function profiles to the profile file. virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap); + std::error_code writeWithSizeLimitInternal(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit, + FunctionPruningStrategy *Strategy); + + /// For writeWithSizeLimit in text mode, each newline takes 1 additional byte + /// on Windows when actually written to the file, but not written to a memory + /// buffer. This needs to be accounted for when rewriting the profile. + size_t LineCount; + /// Output stream where to emit the profile to. std::unique_ptr OutputStream; @@ -102,6 +172,7 @@ protected: : SampleProfileWriter(OS), Indent(0) {} std::error_code writeHeader(const SampleProfileMap &ProfileMap) override { + LineCount = 0; return sampleprof_error::success; } diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h index 2141e2159c0c359f4c0345b86c917e9cadcbf81f..45a3b9a010f9370536c238334a16f885c6b3baa0 100644 --- a/llvm/include/llvm/Support/PGOOptions.h +++ b/llvm/include/llvm/Support/PGOOptions.h @@ -14,44 +14,29 @@ #ifndef LLVM_SUPPORT_PGOOPTIONS_H #define LLVM_SUPPORT_PGOOPTIONS_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/Error.h" namespace llvm { +namespace vfs { +class FileSystem; +} // namespace vfs + /// A struct capturing PGO tunables. struct PGOOptions { enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; - PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", - std::string ProfileRemappingFile = "", PGOAction Action = NoAction, - CSPGOAction CSAction = NoCSAction, + PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, + std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS, + PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction, bool DebugInfoForProfiling = false, - bool PseudoProbeForProfiling = false) - : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), - ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || - (Action == SampleUse && - !PseudoProbeForProfiling)), - PseudoProbeForProfiling(PseudoProbeForProfiling) { - // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can - // callback with IRUse action without ProfileFile. - - // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. - assert(this->CSAction == NoCSAction || - (this->Action != IRInstr && this->Action != SampleUse)); - - // For CSIRInstr, CSProfileGenFile also needs to be nonempty. - assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); - - // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share - // a profile. - assert(this->CSAction != CSIRUse || this->Action == IRUse); + bool PseudoProbeForProfiling = false); + PGOOptions(const PGOOptions &); + ~PGOOptions(); + PGOOptions &operator=(const PGOOptions &); - // If neither Action nor CSAction, DebugInfoForProfiling or - // PseudoProbeForProfiling needs to be true. - assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->DebugInfoForProfiling || this->PseudoProbeForProfiling); - } std::string ProfileFile; std::string CSProfileGenFile; std::string ProfileRemappingFile; @@ -59,6 +44,7 @@ struct PGOOptions { CSPGOAction CSAction; bool DebugInfoForProfiling; bool PseudoProbeForProfiling; + IntrusiveRefCntPtr FS; }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index 5e12fcfeae1b40aeccfeb1550f8ab1db4ac89381..bc8360a80bc02bdb963bf01fedc3b284f1c8ced1 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -64,16 +64,22 @@ public: using iterator = ProfiledCallGraphNode::iterator; // Constructor for non-CS profile. - ProfiledCallGraph(SampleProfileMap &ProfileMap) { + ProfiledCallGraph(SampleProfileMap &ProfileMap, + uint64_t IgnoreColdCallThreshold = 0) { assert(!FunctionSamples::ProfileIsCS && "CS flat profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); } + + // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims + // for a more stable call graph with "determinstic" edges from run to run. + trimColdEges(IgnoreColdCallThreshold); } // Constructor for CS profile. - ProfiledCallGraph(SampleContextTracker &ContextTracker) { + ProfiledCallGraph(SampleContextTracker &ContextTracker, + uint64_t IgnoreColdCallThreshold = 0) { // BFS traverse the context profile trie to add call edges for calls shown // in context. std::queue Queue; @@ -121,11 +127,16 @@ public: ContextTracker.getFuncNameFor(Callee), Weight); } } + + // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims + // for a more stable call graph with "determinstic" edges from run to run. + trimColdEges(IgnoreColdCallThreshold); } iterator begin() { return Root.Edges.begin(); } iterator end() { return Root.Edges.end(); } ProfiledCallGraphNode *getEntryNode() { return &Root; } + void addProfiledFunction(StringRef Name) { if (!ProfiledFunctions.count(Name)) { // Link to synthetic root to make sure every node is reachable @@ -148,8 +159,9 @@ private: auto EdgeIt = Edges.find(Edge); if (EdgeIt == Edges.end()) { Edges.insert(Edge); - } else if (EdgeIt->Weight < Edge.Weight) { - // Replace existing call edges with same target but smaller weight. + } else { + // Accumulate weight to the existing edge. + Edge.Weight += EdgeIt->Weight; Edges.erase(EdgeIt); Edges.insert(Edge); } @@ -175,6 +187,24 @@ private: } } + // Trim edges with weight up to `Threshold`. Do not trim anything if + // `Threshold` is zero. + void trimColdEges(uint64_t Threshold = 0) { + if (!Threshold) + return; + + for (auto &Node : ProfiledFunctions) { + auto &Edges = Node.second.Edges; + auto I = Edges.begin(); + while (I != Edges.end()) { + if (I->Weight <= Threshold) + I = Edges.erase(I); + else + I++; + } + } + } + ProfiledCallGraphNode Root; StringMap ProfiledFunctions; }; diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h index d838c8b8a83ecfcc2a938e2b9b1bf1151280c582..2ef55949e2365560cf0a48c238d24577655b7202 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include @@ -22,14 +23,17 @@ namespace llvm { class Module; +namespace vfs { +class FileSystem; +} // namespace vfs + /// The sample profiler data loader pass. class SampleProfileLoaderPass : public PassInfoMixin { public: SampleProfileLoaderPass( std::string File = "", std::string RemappingFile = "", - ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) - : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), - LTOPhase(LTOPhase) {} + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None, + IntrusiveRefCntPtr FS = nullptr); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -37,6 +41,7 @@ private: std::string ProfileFileName; std::string ProfileRemappingFileName; const ThinOrFullLTOPhase LTOPhase; + IntrusiveRefCntPtr FS; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index 875a17094d2e5f15ba481e1c2a8d2191b9a2ebea..952bc2f8edbe34731a433dd4aa9ea92b95f08131 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -16,6 +16,7 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_PGOINSTRUMENTATION_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" #include #include @@ -26,6 +27,10 @@ class Function; class Instruction; class Module; +namespace vfs { +class FileSystem; +} // namespace vfs + /// The instrumentation (profile-instr-gen) pass for IR based PGO. // We use this pass to create COMDAT profile variables for context // sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO @@ -58,7 +63,8 @@ private: class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = "", bool IsCS = false); + std::string RemappingFilename = "", bool IsCS = false, + IntrusiveRefCntPtr FS = nullptr); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -67,6 +73,7 @@ private: std::string ProfileRemappingFileName; // If this is a context sensitive instrumentation. bool IsCS; + IntrusiveRefCntPtr FS; }; /// The indirect function call promotion pass. diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 19aef8050d44b281ea5f6764281c9390767dbc56..502867da3c50479ed5ffee62ebae6bc77af118c2 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -46,6 +47,10 @@ using namespace sampleprof; using namespace sampleprofutil; using ProfileCount = Function::ProfileCount; +namespace vfs { +class FileSystem; +} // namespace vfs + #define DEBUG_TYPE "sample-profile-impl" namespace afdo_detail { @@ -79,8 +84,9 @@ extern cl::opt SampleProfileUseProfi; template class SampleProfileLoaderBaseImpl { public: - SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) - : Filename(Name), RemappingFilename(RemapName) {} + SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, + IntrusiveRefCntPtr FS) + : Filename(Name), RemappingFilename(RemapName), FS(std::move(FS)) {} void dump() { Reader->dump(); } using InstructionT = typename afdo_detail::IRTraits::InstructionT; @@ -215,6 +221,9 @@ protected: /// Name of the profile remapping file to load. std::string RemappingFilename; + /// VirtualFileSystem to load profile files from. + IntrusiveRefCntPtr FS; + /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index a8996a586909c651de7b5842fd4388eb563355ae..81d86621b9df90e9dc47552cabb70c89e722b223 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -26,6 +26,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" @@ -72,10 +73,11 @@ INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID; -FunctionPass *llvm::createMIRProfileLoaderPass(std::string File, - std::string RemappingFile, - FSDiscriminatorPass P) { - return new MIRProfileLoaderPass(File, RemappingFile, P); +FunctionPass * +llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile, + FSDiscriminatorPass P, + IntrusiveRefCntPtr FS) { + return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS)); } namespace llvm { @@ -136,9 +138,10 @@ public: assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } - MIRProfileLoader(StringRef Name, StringRef RemapName) - : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) { - } + MIRProfileLoader(StringRef Name, StringRef RemapName, + IntrusiveRefCntPtr FS) + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), + std::move(FS)) {} void setBranchProbs(MachineFunction &F); bool runOnFunction(MachineFunction &F); @@ -254,8 +257,8 @@ void MIRProfileLoader::setBranchProbs(MachineFunction &F) { bool MIRProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); - auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P, - RemappingFilename); + auto ReaderOrErr = sampleprof::SampleProfileReader::create( + Filename, Ctx, *FS, P, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); @@ -291,14 +294,16 @@ bool MIRProfileLoader::runOnFunction(MachineFunction &MF) { } // namespace llvm -MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName, - std::string RemappingFileName, - FSDiscriminatorPass P) - : MachineFunctionPass(ID), ProfileFileName(FileName), P(P), - MIRSampleLoader( - std::make_unique(FileName, RemappingFileName)) { +MIRProfileLoaderPass::MIRProfileLoaderPass( + std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P, + IntrusiveRefCntPtr FS) + : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) { LowBit = getFSPassBitBegin(P); HighBit = getFSPassBitEnd(P); + + auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem(); + MIRSampleLoader = std::make_unique( + FileName, RemappingFileName, std::move(VFS)); assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 3127328c363e8dd614967d25b9f9e18ac8ba9430..a047e66dcc014365d0256e67c88c69aff6d9ae57 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" @@ -1149,9 +1150,9 @@ void TargetPassConfig::addMachinePasses() { sampleprof::FSDiscriminatorPass::Pass1)); const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty() && !DisableRAFSProfileLoader) - addPass( - createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), - sampleprof::FSDiscriminatorPass::Pass1)); + addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass1, + nullptr)); } // Run register allocation and passes that are tightly coupled with it, @@ -1525,9 +1526,9 @@ void TargetPassConfig::addBlockPlacement() { sampleprof::FSDiscriminatorPass::Pass2)); const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader) - addPass( - createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), - sampleprof::FSDiscriminatorPass::Pass2)); + addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass2, + nullptr)); } if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 1c2ca253af35399a074f66c80e896be1028a00e7..4c41a382276a4ac37204e302b0e8c3b8b61d70fa 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Program.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -232,21 +233,22 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, unsigned OptLevel, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { + auto FS = vfs::getRealFileSystem(); std::optional PGOOpt; if (!Conf.SampleProfile.empty()) - PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, + PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, FS, PGOOptions::SampleUse, PGOOptions::NoCSAction, true); else if (Conf.RunCSIRInstr) { - PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, + PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, FS, PGOOptions::IRUse, PGOOptions::CSIRInstr, Conf.AddFSDiscriminator); } else if (!Conf.CSIRProfile.empty()) { - PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, + PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, FS, PGOOptions::IRUse, PGOOptions::CSIRUse, Conf.AddFSDiscriminator); NoPGOWarnMismatch = !Conf.PGOWarnMismatch; } else if (Conf.AddFSDiscriminator) { - PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, PGOOptions::NoCSAction, true); } TM->setPGOOption(PGOOpt); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 0d074951cffc5ca3e82b471567851307a9f28613..d36912dc8f607e11b87636981dd28297415b0997 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/PGOOptions.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/Coroutines/CoroCleanup.h" @@ -692,7 +693,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile, - ThinOrFullLTOPhase LTOPhase) { + ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS) { assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); if (!IsCS && !DisablePreInliner) { InlineParams IP; @@ -730,7 +732,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, if (!RunProfileGen) { assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); - MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + MPM.addPass( + PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); @@ -760,13 +763,14 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, MPM.addPass(InstrProfiling(Options, IsCS)); } -void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, - bool RunProfileGen, bool IsCS, - std::string ProfileFile, - std::string ProfileRemappingFile) { +void PassBuilder::addPGOInstrPassesForO0( + ModulePassManager &MPM, bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS) { if (!RunProfileGen) { assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); - MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + MPM.addPass( + PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); @@ -1052,7 +1056,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, addPGOInstrPasses(MPM, Level, /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile, Phase); + PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS); MPM.addPass(PGOIndirectCallPromotion(false, false)); } if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && @@ -1266,11 +1270,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, - PGOOpt->ProfileRemappingFile, LTOPhase); + PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile, LTOPhase); + PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS); } // Re-compute GlobalsAA here prior to function passes. This is particularly @@ -1742,12 +1746,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile, - ThinOrFullLTOPhase::FullLTOPostLink); + ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, - ThinOrFullLTOPhase::FullLTOPostLink); + ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS); } // Break up allocas @@ -1878,7 +1882,8 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, addPGOInstrPassesForO0( MPM, /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), - /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); + /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, + PGOOpt->FS); for (auto &C : PipelineStartEPCallbacks) C(MPM, Level); diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index ce71eebd4fd3809adca7bcf32e22f0ac25994dc0..360d30d5d0a14f0426021a1e3d849975aaa6cbd1 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -383,10 +384,10 @@ Error CoverageMapping::loadFromFile( Expected> CoverageMapping::load(ArrayRef ObjectFilenames, - StringRef ProfileFilename, ArrayRef Arches, - StringRef CompilationDir, + StringRef ProfileFilename, vfs::FileSystem &FS, + ArrayRef Arches, StringRef CompilationDir, const object::BuildIDFetcher *BIDFetcher) { - auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename); + auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename, FS); if (Error E = ProfileReaderOrErr.takeError()) return createFileError(ProfileFilename, std::move(E)); auto ProfileReader = std::move(ProfileReaderOrErr.get()); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index aee104310a1dd7d2df2b74e327ca120e522431ff..342d405af9dbc7d811987f42e90a136f2b3c30e3 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/VirtualFileSystem.h" #include #include #include @@ -1224,7 +1225,10 @@ Error OverlapStats::accumulateCounts(const std::string &BaseFilename, bool IsCS) { auto getProfileSum = [IsCS](const std::string &Filename, CountSumOrPercent &Sum) -> Error { - auto ReaderOrErr = InstrProfReader::create(Filename); + // This function is only used from llvm-profdata that doesn't use any kind + // of VFS. Just create a default RealFileSystem to read profiles. + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(Filename, *FS); if (Error E = ReaderOrErr.takeError()) { return E; } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index d0714c9b466552972ef337a05b64b4d80d203cfc..498ec691044056323003cbdb535d02031416b7db 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/SymbolRemappingReader.h" +#include "llvm/Support/VirtualFileSystem.h" #include #include #include @@ -63,9 +64,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) { } static Expected> -setupMemoryBuffer(const Twine &Path) { - ErrorOr> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); +setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { + auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() + : FS.getBufferForFile(Filename); if (std::error_code EC = BufferOrErr.getError()) return errorCodeToError(EC); return std::move(BufferOrErr.get()); @@ -161,10 +162,10 @@ static Error printBinaryIdsInternal(raw_ostream &OS, } Expected> -InstrProfReader::create(const Twine &Path, +InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, const InstrProfCorrelator *Correlator) { // Set up the buffer to read. - auto BufferOrError = setupMemoryBuffer(Path); + auto BufferOrError = setupMemoryBuffer(Path, FS); if (Error E = BufferOrError.takeError()) return std::move(E); return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); @@ -201,9 +202,10 @@ InstrProfReader::create(std::unique_ptr Buffer, } Expected> -IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { +IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, + const Twine &RemappingPath) { // Set up the buffer to read. - auto BufferOrError = setupMemoryBuffer(Path); + auto BufferOrError = setupMemoryBuffer(Path, FS); if (Error E = BufferOrError.takeError()) return std::move(E); @@ -211,7 +213,7 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { std::unique_ptr RemappingBuffer; std::string RemappingPathStr = RemappingPath.str(); if (!RemappingPathStr.empty()) { - auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); + auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS); if (Error E = RemappingBufferOrError.takeError()) return std::move(E); RemappingBuffer = std::move(RemappingBufferOrError.get()); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index b4d5550a1721823273389fca1bc48cd747fa17cd..780f517d3e79e2f8eabe330aeb77abdd60ee50d8 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -291,7 +291,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt( std::string CalleeGUID; CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); - auto iter = CallsiteSamples.find(Loc); + auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); if (iter == CallsiteSamples.end()) return nullptr; auto FS = iter->second.find(CalleeName); @@ -461,9 +461,9 @@ void ProfileSymbolList::dump(raw_ostream &OS) const { OS << Sym << "\n"; } -CSProfileConverter::FrameNode * -CSProfileConverter::FrameNode::getOrCreateChildFrame( - const LineLocation &CallSite, StringRef CalleeName) { +ProfileConverter::FrameNode * +ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName) { uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildFrames.find(Hash); if (It != AllChildFrames.end()) { @@ -476,7 +476,7 @@ CSProfileConverter::FrameNode::getOrCreateChildFrame( return &AllChildFrames[Hash]; } -CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) +ProfileConverter::ProfileConverter(SampleProfileMap &Profiles) : ProfileMap(Profiles) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; @@ -486,8 +486,8 @@ CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) } } -CSProfileConverter::FrameNode * -CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { +ProfileConverter::FrameNode * +ProfileConverter::getOrCreateContextPath(const SampleContext &Context) { auto Node = &RootFrame; LineLocation CallSiteLoc(0, 0); for (auto &Callsite : Context.getContextFrames()) { @@ -497,14 +497,14 @@ CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { return Node; } -void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { +void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) { // Process each child profile. Add each child profile to callsite profile map // of the current node `Node` if `Node` comes with a profile. Otherwise // promote the child profile to a standalone profile. auto *NodeProfile = Node.FuncSamples; for (auto &It : Node.AllChildFrames) { auto &ChildNode = It.second; - convertProfiles(ChildNode); + convertCSProfiles(ChildNode); auto *ChildProfile = ChildNode.FuncSamples; if (!ChildProfile) continue; @@ -544,4 +544,4 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { } } -void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } +void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index d3753d1e8a99bb8210f9189ec69ab8dcf09567df..7fa3d5c535c25bbd712d65ab057a210f7eb3a684 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/LineIterator.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -1831,8 +1832,9 @@ SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { /// /// \returns an error code indicating the status of the buffer. static ErrorOr> -setupMemoryBuffer(const Twine &Filename) { - auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); +setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { + auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() + : FS.getBufferForFile(Filename); if (std::error_code EC = BufferOrErr.getError()) return EC; auto Buffer = std::move(BufferOrErr.get()); @@ -1853,12 +1855,12 @@ setupMemoryBuffer(const Twine &Filename) { /// \returns an error code indicating the status of the created reader. ErrorOr> SampleProfileReader::create(const std::string Filename, LLVMContext &C, - FSDiscriminatorPass P, + vfs::FileSystem &FS, FSDiscriminatorPass P, const std::string RemapFilename) { - auto BufferOrError = setupMemoryBuffer(Filename); + auto BufferOrError = setupMemoryBuffer(Filename, FS); if (std::error_code EC = BufferOrError.getError()) return EC; - return create(BufferOrError.get(), C, P, RemapFilename); + return create(BufferOrError.get(), C, FS, P, RemapFilename); } /// Create a sample profile remapper from the given input, to remap the @@ -1873,9 +1875,10 @@ SampleProfileReader::create(const std::string Filename, LLVMContext &C, /// \returns an error code indicating the status of the created reader. ErrorOr> SampleProfileReaderItaniumRemapper::create(const std::string Filename, + vfs::FileSystem &FS, SampleProfileReader &Reader, LLVMContext &C) { - auto BufferOrError = setupMemoryBuffer(Filename); + auto BufferOrError = setupMemoryBuffer(Filename, FS); if (std::error_code EC = BufferOrError.getError()) return EC; return create(BufferOrError.get(), Reader, C); @@ -1923,7 +1926,7 @@ SampleProfileReaderItaniumRemapper::create(std::unique_ptr &B, /// \returns an error code indicating the status of the created reader. ErrorOr> SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, - FSDiscriminatorPass P, + vfs::FileSystem &FS, FSDiscriminatorPass P, const std::string RemapFilename) { std::unique_ptr Reader; if (SampleProfileReaderRawBinary::hasFormat(*B)) @@ -1940,8 +1943,8 @@ SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, return sampleprof_error::unrecognized_format; if (!RemapFilename.empty()) { - auto ReaderOrErr = - SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); + auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create( + RemapFilename, FS, *Reader, C); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not create remapper: " + EC.message(); C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 093790afe2d6664ef39dd5eff42fc11894ee950e..bce858a99a8193b8fba66ec5d888c6fcd41d1dda 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -37,9 +38,109 @@ #include #include +#define DEBUG_TYPE "llvm-profdata" + using namespace llvm; using namespace sampleprof; +namespace llvm { +namespace support { +namespace endian { +namespace { + +// Adapter class to llvm::support::endian::Writer for pwrite(). +struct SeekableWriter { + raw_pwrite_stream &OS; + endianness Endian; + SeekableWriter(raw_pwrite_stream &OS, endianness Endian) + : OS(OS), Endian(Endian) {} + + template + void pwrite(ValueType Val, size_t Offset) { + std::string StringBuf; + raw_string_ostream SStream(StringBuf); + Writer(SStream, Endian).write(Val); + OS.pwrite(StringBuf.data(), StringBuf.size(), Offset); + } +}; + +} // namespace +} // namespace endian +} // namespace support +} // namespace llvm + +DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy( + SampleProfileMap &ProfileMap, size_t OutputSizeLimit) + : FunctionPruningStrategy(ProfileMap, OutputSizeLimit) { + sortFuncProfiles(ProfileMap, SortedFunctions); +} + +void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) { + double D = (double)OutputSizeLimit / CurrentOutputSize; + size_t NewSize = (size_t)round(ProfileMap.size() * D * D); + size_t NumToRemove = ProfileMap.size() - NewSize; + if (NumToRemove < 1) + NumToRemove = 1; + + assert(NumToRemove <= SortedFunctions.size()); + llvm::for_each( + llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() - + NumToRemove, + SortedFunctions.end()), + [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); }); + SortedFunctions.resize(SortedFunctions.size() - NumToRemove); +} + +std::error_code SampleProfileWriter::writeWithSizeLimitInternal( + SampleProfileMap &ProfileMap, size_t OutputSizeLimit, + FunctionPruningStrategy *Strategy) { + if (OutputSizeLimit == 0) + return write(ProfileMap); + + size_t OriginalFunctionCount = ProfileMap.size(); + + std::unique_ptr OriginalOutputStream; + OutputStream.swap(OriginalOutputStream); + + size_t IterationCount = 0; + size_t TotalSize; + + SmallVector StringBuffer; + do { + StringBuffer.clear(); + OutputStream.reset(new raw_svector_ostream(StringBuffer)); + if (std::error_code EC = write(ProfileMap)) + return EC; + + TotalSize = StringBuffer.size(); + // On Windows every "\n" is actually written as "\r\n" to disk but not to + // memory buffer, this difference should be added when considering the total + // output size. +#ifdef _WIN32 + if (Format == SPF_Text) + TotalSize += LineCount; +#endif + if (TotalSize <= OutputSizeLimit) + break; + + Strategy->Erase(TotalSize); + IterationCount++; + } while (ProfileMap.size() != 0); + + if (ProfileMap.size() == 0) + return sampleprof_error::too_large; + + OutputStream.swap(OriginalOutputStream); + OutputStream->write(StringBuffer.data(), StringBuffer.size()); + LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount + << " functions, reduced to " << ProfileMap.size() << " in " + << IterationCount << " iterations\n"); + // Silence warning on Release build. + (void)OriginalFunctionCount; + (void)IterationCount; + return sampleprof_error::success; +} + std::error_code SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) { std::vector V; @@ -116,6 +217,12 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection( std::error_code SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) { + // When calling write on a different profile map, existing states should be + // cleared. + NameTable.clear(); + CSNameTable.clear(); + SecHdrTable.clear(); + if (std::error_code EC = writeHeader(ProfileMap)) return EC; @@ -477,6 +584,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { if (Indent == 0) OS << ":" << S.getHeadSamples(); OS << "\n"; + LineCount++; SampleSorter SortedSamples(S.getBodySamples()); for (const auto &I : SortedSamples.get()) { @@ -493,6 +601,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { for (const auto &J : Sample.getSortedCallTargets()) OS << " " << J.first << ":" << J.second; OS << "\n"; + LineCount++; } SampleSorter SortedCallsiteSamples( @@ -515,11 +624,13 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { if (FunctionSamples::ProfileIsProbeBased) { OS.indent(Indent + 1); OS << "!CFGChecksum: " << S.getFunctionHash() << "\n"; + LineCount++; } if (S.getContext().getAllAttributes()) { OS.indent(Indent + 1); OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n"; + LineCount++; } return sampleprof_error::success; @@ -605,14 +716,10 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() { auto &OS = *OutputStream; // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable. - auto &OFS = static_cast(OS); uint64_t FuncOffsetTableStart = OS.tell(); - if (OFS.seek(TableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - Writer.write(FuncOffsetTableStart); - if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; + support::endian::SeekableWriter Writer(static_cast(OS), + support::little); + Writer.pwrite(FuncOffsetTableStart, TableOffset); // Write out the table size. encodeULEB128(FuncOffsetTable.size(), OS); @@ -623,6 +730,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() { return EC; encodeULEB128(Entry.second, OS); } + FuncOffsetTable.clear(); return sampleprof_error::success; } @@ -650,6 +758,10 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) { std::error_code SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) { + // When calling write on a different profile map, existing names should be + // cleared. + NameTable.clear(); + writeMagicIdent(Format); computeSummary(ProfileMap); @@ -690,14 +802,6 @@ void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { } std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { - auto &OFS = static_cast(*OutputStream); - uint64_t Saved = OutputStream->tell(); - - // Set OutputStream to the location saved in SecHdrTableOffset. - if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - assert(SecHdrTable.size() == SectionHdrLayout.size() && "SecHdrTable entries doesn't match SectionHdrLayout"); SmallVector IndexMap(SecHdrTable.size(), -1); @@ -714,21 +818,23 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { // needs to be computed after SecLBRProfile (the order in SecHdrTable), // but it needs to be read before SecLBRProfile (the order in // SectionHdrLayout). So we use IndexMap above to switch the order. + support::endian::SeekableWriter Writer( + static_cast(*OutputStream), support::little); for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size(); LayoutIdx++) { assert(IndexMap[LayoutIdx] < SecHdrTable.size() && "Incorrect LayoutIdx in SecHdrTable"); auto Entry = SecHdrTable[IndexMap[LayoutIdx]]; - Writer.write(static_cast(Entry.Type)); - Writer.write(static_cast(Entry.Flags)); - Writer.write(static_cast(Entry.Offset)); - Writer.write(static_cast(Entry.Size)); + Writer.pwrite(static_cast(Entry.Type), + SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Flags), + SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Offset), + SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Size), + SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t)); } - // Reset OutputStream. - if (OFS.seek(Saved) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - return sampleprof_error::success; } diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 4cbc3b79f3bbbe1379d3867d949d714a9b3304fb..fdb2a5dcaa1a5423e420efa7ec492f82fd574cdd 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -195,6 +195,7 @@ add_llvm_component_library(LLVMSupport NativeFormatting.cpp OptimizedStructLayout.cpp Optional.cpp + PGOOptions.cpp Parallel.cpp PluginLoader.cpp PrettyStackTrace.cpp diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d11528ca6dbc26a9b58e1de2beb2a30054a00464 --- /dev/null +++ b/llvm/lib/Support/PGOOptions.cpp @@ -0,0 +1,52 @@ +//===------ PGOOptions.cpp -- PGO option tunables --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PGOOptions.h" +#include "llvm/Support/VirtualFileSystem.h" + +using namespace llvm; + +PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, + std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS, PGOAction Action, + CSPGOAction CSAction, bool DebugInfoForProfiling, + bool PseudoProbeForProfiling) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), + CSAction(CSAction), + DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // If neither Action nor CSAction, DebugInfoForProfiling or + // PseudoProbeForProfiling needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + + // If we need to use the profile, the VFS cannot be nullptr. + assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse)); +} + +PGOOptions::PGOOptions(const PGOOptions &) = default; + +PGOOptions &PGOOptions::operator=(const PGOOptions &O) = default; + +PGOOptions::~PGOOptions() = default; diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 08dc514a6476f0de8968e0be1eddb72885b36d85..29ae05bf0c9467300fb451d9a907ce8709f3dab7 100644 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Transforms/IPO/SampleProfile.h" using namespace llvm; using namespace sampleprof; @@ -159,8 +160,10 @@ bool X86InsertPrefetch::doInitialization(Module &M) { return false; LLVMContext &Ctx = M.getContext(); + // TODO: Propagate virtual file system into LLVM targets. + auto FS = vfs::getRealFileSystem(); ErrorOr> ReaderOrErr = - SampleProfileReader::create(Filename, Ctx); + SampleProfileReader::create(Filename, Ctx, *FS); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg, diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 93b368fd72a6279a161222db293f213900c1fd46..7a08536f9be5c324955c21d2b998627163ba076d 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ProfiledCallGraph.h" @@ -129,6 +130,11 @@ static cl::opt SampleProfileRemappingFile( "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden); +static cl::opt SalvageStaleProfile( + "salvage-stale-profile", cl::Hidden, cl::init(false), + cl::desc("Salvage stale profile by fuzzy matching and use the remapped " + "location for sample profile query.")); + static cl::opt ReportProfileStaleness( "report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics.")); @@ -138,6 +144,11 @@ static cl::opt PersistProfileStaleness( cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section).")); +static cl::opt FlattenProfileForMatching( + "flatten-profile-for-matching", cl::Hidden, cl::init(true), + cl::desc( + "Use flattened profile for stale profile detection and matching.")); + static cl::opt ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -428,6 +439,11 @@ class SampleProfileMatcher { Module &M; SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; + SampleProfileMap FlattenedProfiles; + // For each function, the matcher generates a map, of which each entry is a + // mapping from the source location of current build to the source location in + // the profile. + StringMap FuncMappings; // Profile mismatching statstics. uint64_t TotalProfiledCallsites = 0; @@ -442,9 +458,43 @@ class SampleProfileMatcher { public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager) {} - void detectProfileMismatch(); - void detectProfileMismatch(const Function &F, const FunctionSamples &FS); + : M(M), Reader(Reader), ProbeManager(ProbeManager) { + if (FlattenProfileForMatching) { + ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, + FunctionSamples::ProfileIsCS); + } + } + void runOnModule(); + +private: + FunctionSamples *getFlattenedSamplesFor(const Function &F) { + StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); + auto It = FlattenedProfiles.find(CanonFName); + if (It != FlattenedProfiles.end()) + return &It->second; + return nullptr; + } + void runOnFunction(const Function &F, const FunctionSamples &FS); + void countProfileMismatches( + const FunctionSamples &FS, + const std::unordered_set + &MatchedCallsiteLocs, + uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites); + + LocToLocMap &getIRToProfileLocationMap(const Function &F) { + auto Ret = FuncMappings.try_emplace( + FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap()); + return Ret.first->second; + } + void distributeIRToProfileLocationMap(); + void distributeIRToProfileLocationMap(FunctionSamples &FS); + void populateProfileCallsites( + const FunctionSamples &FS, + StringMap> &CalleeToCallsitesMap); + void runStaleProfileMatching( + const std::map &IRLocations, + StringMap> &CalleeToCallsitesMap, + LocToLocMap &IRToProfileLocationMap); }; /// Sample profile pass. @@ -457,10 +507,12 @@ class SampleProfileLoader final public: SampleProfileLoader( StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS, std::function GetAssumptionCache, std::function GetTargetTransformInfo, std::function GetTLI) - : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)), + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), + std::move(FS)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), LTOPhase(LTOPhase), @@ -1784,9 +1836,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { if (!ProbeManager->profileIsValid(F, *Samples)) { LLVM_DEBUG( dbgs() << "Profile is invalid due to CFG mismatch for Function " - << F.getName()); + << F.getName() << "\n"); ++NumMismatchedProfile; - return false; + if (!SalvageStaleProfile) + return false; } ++NumMatchedProfile; } else { @@ -1954,7 +2007,7 @@ bool SampleProfileLoader::doInitialization(Module &M, auto &Ctx = M.getContext(); auto ReaderOrErr = SampleProfileReader::create( - Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename); + Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); @@ -2016,6 +2069,16 @@ bool SampleProfileLoader::doInitialization(Module &M, UsePreInlinerDecision = true; } + // Enable stale profile matching by default for probe-based profile. + // Currently the matching relies on if the checksum mismatch is detected, + // which is currently only available for pseudo-probe mode. Removing the + // checksum check could cause regressions for some cases, so further tuning + // might be needed if we want to enable it for all cases. + if (Reader->profileIsProbeBased() && + !SalvageStaleProfile.getNumOccurrences()) { + SalvageStaleProfile = true; + } + if (!Reader->profileIsCS()) { // Non-CS profile should be fine without a function size budget for the // inliner since the contexts in the profile are either all from inlining @@ -2046,7 +2109,8 @@ bool SampleProfileLoader::doInitialization(Module &M, } } - if (ReportProfileStaleness || PersistProfileStaleness) { + if (ReportProfileStaleness || PersistProfileStaleness || + SalvageStaleProfile) { MatchingManager = std::make_unique(M, *Reader, ProbeManager.get()); } @@ -2054,8 +2118,167 @@ bool SampleProfileLoader::doInitialization(Module &M, return true; } -void SampleProfileMatcher::detectProfileMismatch(const Function &F, - const FunctionSamples &FS) { +void SampleProfileMatcher::countProfileMismatches( + const FunctionSamples &FS, + const std::unordered_set + &MatchedCallsiteLocs, + uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) { + + auto isInvalidLineOffset = [](uint32_t LineOffset) { + return LineOffset & 0x8000; + }; + + // Check if there are any callsites in the profile that does not match to any + // IR callsites, those callsite samples will be discarded. + for (auto &I : FS.getBodySamples()) { + const LineLocation &Loc = I.first; + if (isInvalidLineOffset(Loc.LineOffset)) + continue; + + uint64_t Count = I.second.getSamples(); + if (!I.second.getCallTargets().empty()) { + TotalCallsiteSamples += Count; + FuncProfiledCallsites++; + if (!MatchedCallsiteLocs.count(Loc)) { + MismatchedCallsiteSamples += Count; + FuncMismatchedCallsites++; + } + } + } + + for (auto &I : FS.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + if (isInvalidLineOffset(Loc.LineOffset)) + continue; + + uint64_t Count = 0; + for (auto &FM : I.second) { + Count += FM.second.getHeadSamplesEstimate(); + } + TotalCallsiteSamples += Count; + FuncProfiledCallsites++; + if (!MatchedCallsiteLocs.count(Loc)) { + MismatchedCallsiteSamples += Count; + FuncMismatchedCallsites++; + } + } +} + +// Populate the anchors(direct callee name) from profile. +void SampleProfileMatcher::populateProfileCallsites( + const FunctionSamples &FS, + StringMap> &CalleeToCallsitesMap) { + for (const auto &I : FS.getBodySamples()) { + const auto &Loc = I.first; + const auto &CTM = I.second.getCallTargets(); + // Filter out possible indirect calls, use direct callee name as anchor. + if (CTM.size() == 1) { + StringRef CalleeName = CTM.begin()->first(); + const auto &Candidates = CalleeToCallsitesMap.try_emplace( + CalleeName, std::set()); + Candidates.first->second.insert(Loc); + } + } + + for (const auto &I : FS.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + const auto &CalleeMap = I.second; + // Filter out possible indirect calls, use direct callee name as anchor. + if (CalleeMap.size() == 1) { + StringRef CalleeName = CalleeMap.begin()->first; + const auto &Candidates = CalleeToCallsitesMap.try_emplace( + CalleeName, std::set()); + Candidates.first->second.insert(Loc); + } + } +} + +// Call target name anchor based profile fuzzy matching. +// Input: +// For IR locations, the anchor is the callee name of direct callsite; For +// profile locations, it's the call target name for BodySamples or inlinee's +// profile name for CallsiteSamples. +// Matching heuristic: +// First match all the anchors in lexical order, then split the non-anchor +// locations between the two anchors evenly, first half are matched based on the +// start anchor, second half are matched based on the end anchor. +// For example, given: +// IR locations: [1, 2(foo), 3, 5, 6(bar), 7] +// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] +// The matching gives: +// [1, 2(foo), 3, 5, 6(bar), 7] +// | | | | | | +// [1, 2, 3(foo), 4, 7, 8(bar), 9] +// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. +void SampleProfileMatcher::runStaleProfileMatching( + const std::map &IRLocations, + StringMap> &CalleeToCallsitesMap, + LocToLocMap &IRToProfileLocationMap) { + assert(IRToProfileLocationMap.empty() && + "Run stale profile matching only once per function"); + + auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { + // Skip the unchanged location mapping to save memory. + if (From != To) + IRToProfileLocationMap.insert({From, To}); + }; + + // Use function's beginning location as the initial anchor. + int32_t LocationDelta = 0; + SmallVector LastMatchedNonAnchors; + + for (const auto &IR : IRLocations) { + const auto &Loc = IR.first; + StringRef CalleeName = IR.second; + bool IsMatchedAnchor = false; + // Match the anchor location in lexical order. + if (!CalleeName.empty()) { + auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName); + if (ProfileAnchors != CalleeToCallsitesMap.end() && + !ProfileAnchors->second.empty()) { + auto CI = ProfileAnchors->second.begin(); + const auto &Candidate = *CI; + ProfileAnchors->second.erase(CI); + InsertMatching(Loc, Candidate); + LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName + << " is matched from " << Loc << " to " << Candidate + << "\n"); + LocationDelta = Candidate.LineOffset - Loc.LineOffset; + + // Match backwards for non-anchor locations. + // The locations in LastMatchedNonAnchors have been matched forwards + // based on the previous anchor, spilt it evenly and overwrite the + // second half based on the current anchor. + for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; + I < LastMatchedNonAnchors.size(); I++) { + const auto &L = LastMatchedNonAnchors[I]; + uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; + LineLocation Candidate(CandidateLineOffset, L.Discriminator); + InsertMatching(L, Candidate); + LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L + << " to " << Candidate << "\n"); + } + + IsMatchedAnchor = true; + LastMatchedNonAnchors.clear(); + } + } + + // Match forwards for non-anchor locations. + if (!IsMatchedAnchor) { + uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; + LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); + InsertMatching(Loc, Candidate); + LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " + << Candidate << "\n"); + LastMatchedNonAnchors.emplace_back(Loc); + } + } +} + +void SampleProfileMatcher::runOnFunction(const Function &F, + const FunctionSamples &FS) { + bool IsFuncHashMismatch = false; if (FunctionSamples::ProfileIsProbeBased) { uint64_t Count = FS.getTotalSamples(); TotalFuncHashSamples += Count; @@ -2063,16 +2286,24 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F, if (!ProbeManager->profileIsValid(F, FS)) { MismatchedFuncHashSamples += Count; NumMismatchedFuncHash++; - return; + IsFuncHashMismatch = true; } } std::unordered_set MatchedCallsiteLocs; + // The value of the map is the name of direct callsite and use empty StringRef + // for non-direct-call site. + std::map IRLocations; - // Go through all the callsites on the IR and flag the callsite if the target - // name is the same as the one in the profile. + // Extract profile matching anchors and profile mismatch metrics in the IR. for (auto &BB : F) { for (auto &I : BB) { + // TODO: Support line-number based location(AutoFDO). + if (FunctionSamples::ProfileIsProbeBased && isa(&I)) { + if (std::optional Probe = extractProbe(I)) + IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef()); + } + if (!isa(&I) || isa(&I)) continue; @@ -2084,6 +2315,17 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F, if (Function *Callee = CB->getCalledFunction()) CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); + // Force to overwrite the callee name in case any non-call location was + // written before. + auto R = IRLocations.emplace(IRCallsite, CalleeName); + R.first->second = CalleeName; + assert((!FunctionSamples::ProfileIsProbeBased || R.second || + R.first->second == CalleeName) && + "Overwrite non-call or different callee name location for " + "pseudo probe callsite"); + + // Go through all the callsites on the IR and flag the callsite if the + // target name is the same as the one in the profile. const auto CTM = FS.findCallTargetMapAt(IRCallsite); const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite); @@ -2105,55 +2347,54 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F, } } - auto isInvalidLineOffset = [](uint32_t LineOffset) { - return LineOffset & 0x8000; - }; + // Detect profile mismatch for profile staleness metrics report. + if (ReportProfileStaleness || PersistProfileStaleness) { + uint64_t FuncMismatchedCallsites = 0; + uint64_t FuncProfiledCallsites = 0; + countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites, + FuncProfiledCallsites); + TotalProfiledCallsites += FuncProfiledCallsites; + NumMismatchedCallsites += FuncMismatchedCallsites; + LLVM_DEBUG({ + if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch && + FuncMismatchedCallsites) + dbgs() << "Function checksum is matched but there are " + << FuncMismatchedCallsites << "/" << FuncProfiledCallsites + << " mismatched callsites.\n"; + }); + } - // Check if there are any callsites in the profile that does not match to any - // IR callsites, those callsite samples will be discarded. - for (auto &I : FS.getBodySamples()) { - const LineLocation &Loc = I.first; - if (isInvalidLineOffset(Loc.LineOffset)) - continue; + if (IsFuncHashMismatch && SalvageStaleProfile) { + LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() + << "\n"); - uint64_t Count = I.second.getSamples(); - if (!I.second.getCallTargets().empty()) { - TotalCallsiteSamples += Count; - TotalProfiledCallsites++; - if (!MatchedCallsiteLocs.count(Loc)) { - MismatchedCallsiteSamples += Count; - NumMismatchedCallsites++; - } - } - } + StringMap> CalleeToCallsitesMap; + populateProfileCallsites(FS, CalleeToCallsitesMap); - for (auto &I : FS.getCallsiteSamples()) { - const LineLocation &Loc = I.first; - if (isInvalidLineOffset(Loc.LineOffset)) - continue; + // The matching result will be saved to IRToProfileLocationMap, create a new + // map for each function. + auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); - uint64_t Count = 0; - for (auto &FM : I.second) { - Count += FM.second.getHeadSamplesEstimate(); - } - TotalCallsiteSamples += Count; - TotalProfiledCallsites++; - if (!MatchedCallsiteLocs.count(Loc)) { - MismatchedCallsiteSamples += Count; - NumMismatchedCallsites++; - } + runStaleProfileMatching(IRLocations, CalleeToCallsitesMap, + IRToProfileLocationMap); } } -void SampleProfileMatcher::detectProfileMismatch() { +void SampleProfileMatcher::runOnModule() { for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = Reader.getSamplesFor(F); + FunctionSamples *FS = nullptr; + if (FlattenProfileForMatching) + FS = getFlattenedSamplesFor(F); + else + FS = Reader.getSamplesFor(F); if (!FS) continue; - detectProfileMismatch(F, *FS); + runOnFunction(F, *FS); } + if (SalvageStaleProfile) + distributeIRToProfileLocationMap(); if (ReportProfileStaleness) { if (FunctionSamples::ProfileIsProbeBased) { @@ -2196,6 +2437,28 @@ void SampleProfileMatcher::detectProfileMismatch() { } } +void SampleProfileMatcher::distributeIRToProfileLocationMap( + FunctionSamples &FS) { + const auto ProfileMappings = FuncMappings.find(FS.getName()); + if (ProfileMappings != FuncMappings.end()) { + FS.setIRToProfileLocationMap(&(ProfileMappings->second)); + } + + for (auto &Inlinees : FS.getCallsiteSamples()) { + for (auto FS : Inlinees.second) { + distributeIRToProfileLocationMap(FS.second); + } + } +} + +// Use a central place to distribute the matching results. Outlined and inlined +// profile with the function name will be set to the same pointer. +void SampleProfileMatcher::distributeIRToProfileLocationMap() { + for (auto &I : Reader.getProfiles()) { + distributeIRToProfileLocationMap(I.second); + } +} + bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); @@ -2240,8 +2503,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, assert(SymbolMap.count(StringRef()) == 0 && "No empty StringRef should be added in SymbolMap"); - if (ReportProfileStaleness || PersistProfileStaleness) - MatchingManager->detectProfileMismatch(); + if (ReportProfileStaleness || PersistProfileStaleness || + SalvageStaleProfile) { + MatchingManager->runOnModule(); + } bool retval = false; for (auto *F : buildFunctionOrder(M, CG)) { @@ -2327,6 +2592,11 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) return emitAnnotations(F); return false; } +SampleProfileLoaderPass::SampleProfileLoaderPass( + std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS) + : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), + LTOPhase(LTOPhase), FS(std::move(FS)) {} PreservedAnalyses SampleProfileLoaderPass::run(Module &M, ModuleAnalysisManager &AM) { @@ -2343,11 +2613,14 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, return FAM.getResult(F); }; + if (!FS) + FS = vfs::getRealFileSystem(); + SampleProfileLoader SampleLoader( ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - LTOPhase, GetAssumptionCache, GetTTI, GetTLI); + LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI); if (!SampleLoader.doInitialization(M, &FAM)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 4d4eb6f8ce80b45deb451b899c751a3cc6b11a59..78440758cf150c9c68fa5cbb05049d4256f4ed58 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -110,6 +110,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/HashBuilder.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -2059,6 +2060,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, + vfs::FileSystem &FS, function_ref LookupTLI, function_ref LookupBPI, function_ref LookupBFI, @@ -2066,8 +2068,8 @@ static bool annotateAllFunctions( LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. - auto ReaderOrErr = - IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); + auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, + ProfileRemappingFileName); if (Error E = ReaderOrErr.takeError()) { handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { Ctx.diagnose( @@ -2249,15 +2251,18 @@ static bool annotateAllFunctions( return true; } -PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename, - bool IsCS) +PGOInstrumentationUse::PGOInstrumentationUse( + std::string Filename, std::string RemappingFilename, bool IsCS, + IntrusiveRefCntPtr VFS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), + FS(std::move(VFS)) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) ProfileRemappingFileName = PGOTestProfileRemappingFile; + if (!FS) + FS = vfs::getRealFileSystem(); } PreservedAnalyses PGOInstrumentationUse::run(Module &M, @@ -2276,7 +2281,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, auto *PSI = &AM.getResult(M); - if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, + if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 691ee00bd831e42d455d95a487ba30728a637f5f..31d62fbf0618b46b0e3b414d16c4fab52edf761f 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -20,6 +20,7 @@ #include #include #include +#include using namespace llvm; #define DEBUG_TYPE "sample-profile-inference" @@ -1218,10 +1219,23 @@ void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network, #ifndef NDEBUG /// Verify that the provided block/jump weights are as expected. void verifyInput(const FlowFunction &Func) { - // Verify the entry block + // Verify entry and exit blocks assert(Func.Entry == 0 && Func.Blocks[0].isEntry()); + size_t NumExitBlocks = 0; for (size_t I = 1; I < Func.Blocks.size(); I++) { assert(!Func.Blocks[I].isEntry() && "multiple entry blocks"); + if (Func.Blocks[I].isExit()) + NumExitBlocks++; + } + assert(NumExitBlocks > 0 && "cannot find exit blocks"); + + // Verify that there are no parallel edges + for (auto &Block : Func.Blocks) { + std::unordered_set UniqueSuccs; + for (auto &Jump : Block.SuccJumps) { + auto It = UniqueSuccs.insert(Jump->Target); + assert(It.second && "input CFG contains parallel edges"); + } } // Verify CFG jumps for (auto &Block : Func.Blocks) { @@ -1304,8 +1318,26 @@ void verifyOutput(const FlowFunction &Func) { } // end of anonymous namespace -/// Apply the profile inference algorithm for a given function +/// Apply the profile inference algorithm for a given function and provided +/// profi options void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) { + // Check if the function has samples and assign initial flow values + bool HasSamples = false; + for (FlowBlock &Block : Func.Blocks) { + if (Block.Weight > 0) + HasSamples = true; + Block.Flow = Block.Weight; + } + for (FlowJump &Jump : Func.Jumps) { + if (Jump.Weight > 0) + HasSamples = true; + Jump.Flow = Jump.Weight; + } + + // Quit early for functions with a single block or ones w/o samples + if (Func.Blocks.size() <= 1 || !HasSamples) + return; + #ifndef NDEBUG // Verify the input data verifyInput(Func); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof new file mode 100644 index 0000000000000000000000000000000000000000..0a04602b2a0126e5d6a05958c1fa792de4bee149 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof @@ -0,0 +1,18 @@ +[main]:30:0 + 0: 0 + 1.1: 0 + 3: 10 matched:10 + 4: 10 + 5: 10 bar_mismatch:10 + 7: 5 foo:5 + 8: 0 +[main:7 @ foo]:15:5 + 1: 5 + 2: 5 + 3: 5 inlinee_mismatch:5 +[bar]:10:10 + 1: 10 +[matched]:10:10 + 1: 10 +[main:7 @ foo:3 @ inlinee_mismatch]:5:5 + 1: 5 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof index 0bb17b2f8f6e408fa9b0cafcf0887b2433bc8f89..818a048b8cabb84ee73e25a3fe8590a4aabe54f8 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof @@ -5,9 +5,11 @@ main:30:0 4: 10 5: 10 bar_mismatch:10 8: 0 - 7: foo:10 + 7: foo:15 1: 5 2: 5 + 3: inlinee_mismatch:5 + 1: 5 bar:10:10 1: 10 matched:10:10 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof new file mode 100644 index 0000000000000000000000000000000000000000..4d6241bb8568d4757e7fff285fa00a2c7da82269 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof @@ -0,0 +1,28 @@ +main:1497:0 + 1: 0 + 2: 112 + 3: 112 bar:60 dummy_calltarget:50 + 4: 116 + 5: 0 + 7: 124 bar:124 + 9: 126 bar:126 + 6: foo:452 + 1: 112 + 2: 101 + 3: 13 + 4: 112 + 5: 101 bar:109 + 6: 13 bar:14 + !CFGChecksum: 563022570642068 + 8: foo:472 + 1: 117 + 2: 104 + 3: 13 + 4: 121 + 5: 104 bar:104 + 6: 14 bar:14 + !CFGChecksum: 563022570642068 + !CFGChecksum: 1125988587804525 +bar:491:491 + 1: 491 + !CFGChecksum: 4294967295 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll index c88de5f56c743da172daa30a79f00be1ddb25c70..177329f9549792ab01203ed911681a647a9ea54e 100644 --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,7 +10,7 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll index da9c37937d2aea1bc00dc71ee463eff2fc10778d..030b5aa18816542a3c61797045908e36df84f32e 100644 --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll new file mode 100644 index 0000000000000000000000000000000000000000..ef11652fd1a87b58c4b29234cc4e9b424716572b --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll @@ -0,0 +1,13 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + + +; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. + +; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll index cf07974da27febc42d757a0946eaa1b6ebc45d67..8340c3b0e62d5eb865b263bd7a553ebabd9674eb 100644 --- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll ; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD ; RUN: llc < %t.ll -filetype=obj -o %t.obj diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll new file mode 100644 index 0000000000000000000000000000000000000000..54dbc1d50d419d9484a90c5178181cab0d0b8d42 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll @@ -0,0 +1,342 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile 2>&1 | FileCheck %s + +; The profiled source code: + +; volatile int x = 1; +; __attribute__((noinline)) int bar(int p) { +; return p; +; } + +; __attribute__((always_inline)) int foo(int i, int p) { +; if (i % 10) return bar(p); +; else return bar(p + 1); +; } + +; int main() { +; for (int i = 0; i < 1000 * 1000; i++) { +; x += foo(i, x); +; x += bar(x); +; x += foo(i, x); +; x += bar(x); +; } +; } + +; The source code for the current build: + +; volatile int x = 1; +; __attribute__((noinline)) int bar(int p) { +; return p; +; } + +; __attribute__((always_inline)) int foo(int i, int p) { +; if (i % 10) return bar(p); +; else return bar(p + 1); +; } + +; int main() { +; if (x == 0) // code change +; return 0; // code change +; for (int i = 0; i < 1000 * 1000; i++) { +; x += foo(i, x); +; x += bar(x); +; if (i < 0) // code change +; return 0; // code change +; x += foo(i, x); +; x += bar(x); +; } +; } + + +; CHECK: Run stale profile matching for main + +; CHECK: Location is matched from 1 to 1 +; CHECK: Location is matched from 2 to 2 +; CHECK: Location is matched from 3 to 3 +; CHECK: Location is matched from 4 to 4 +; CHECK: Location is matched from 5 to 5 +; CHECK: Location is matched from 6 to 6 +; CHECK: Location is matched from 7 to 7 +; CHECK: Location is matched from 8 to 8 +; CHECK: Location is matched from 9 to 9 +; CHECK: Location is matched from 10 to 10 +; CHECK: Location is matched from 11 to 11 + +; CHECK: Callsite with callee:foo is matched from 13 to 6 +; CHECK: Location is rematched backwards from 7 to 0 +; CHECK: Location is rematched backwards from 8 to 1 +; CHECK: Location is rematched backwards from 9 to 2 +; CHECK: Location is rematched backwards from 10 to 3 +; CHECK: Location is rematched backwards from 11 to 4 +; CHECK: Callsite with callee:bar is matched from 14 to 7 +; CHECK: Callsite with callee:foo is matched from 15 to 8 +; CHECK: Callsite with callee:bar is matched from 16 to 9 + + +; CHECK: 2: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 3: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 4: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00) +; CHECK: 5: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) +; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 101 - factor: 1.00) +; CHECK: 5: %call.i3 = call i32 @bar(i32 noundef %1), !dbg ![[#]] - weight: 101 - factor: 1.00) +; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00) +; CHECK: 6: %call1.i6 = call i32 @bar(i32 noundef %add.i5), !dbg ![[#]] - weight: 13 - factor: 1.00) +; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 14: %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00) +; CHECK: 8: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) +; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00) +; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00) +; CHECK: 5: %call.i = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 104 - factor: 1.00) +; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00) +; CHECK: 6: %call1.i = call i32 @bar(i32 noundef %add.i), !dbg ![[#]] - weight: 14 - factor: 1.00) +; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 121 - factor: 1.00) +; CHECK: 16: %call9 = call i32 @bar(i32 noundef %7), !dbg ![[#]] - weight: 126 - factor: 1.00) +; CHECK: 9: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 10: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 11: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00) +; CHECK: 1: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 1, align 4, !dbg !0 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @bar(i32 noundef %p) #0 !dbg !16 { +entry: + call void @llvm.dbg.value(metadata i32 %p, metadata !20, metadata !DIExpression()), !dbg !21 + call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !22 + ret i32 %p, !dbg !23 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: alwaysinline nounwind uwtable +define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #2 !dbg !24 { +entry: + call void @llvm.dbg.value(metadata i32 %i, metadata !28, metadata !DIExpression()), !dbg !30 + call void @llvm.dbg.value(metadata i32 %p, metadata !29, metadata !DIExpression()), !dbg !30 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !31 + %rem = srem i32 %i, 10, !dbg !33 + %tobool = icmp ne i32 %rem, 0, !dbg !33 + br i1 %tobool, label %if.then, label %if.else, !dbg !34 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !35 + %call = call i32 @bar(i32 noundef %p), !dbg !36 + br label %return, !dbg !38 + +if.else: ; preds = %entry + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg !39 + %add = add nsw i32 %p, 1, !dbg !40 + %call1 = call i32 @bar(i32 noundef %add), !dbg !41 + br label %return, !dbg !43 + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !44 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !45 + ret i32 %retval.0, !dbg !45 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #3 !dbg !46 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !52 + %0 = load volatile i32, ptr @x, align 4, !dbg !52, !tbaa !54 + %cmp = icmp eq i32 %0, 0, !dbg !58 + br i1 %cmp, label %if.then, label %if.end, !dbg !59 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !60 + br label %for.end, !dbg !60 + +if.end: ; preds = %entry + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !61 + call void @llvm.dbg.value(metadata i32 0, metadata !50, metadata !DIExpression()), !dbg !62 + br label %for.cond, !dbg !63 + +for.cond: ; preds = %if.end6, %if.end + %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !64 + call void @llvm.dbg.value(metadata i32 %i.0, metadata !50, metadata !DIExpression()), !dbg !62 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !65 + %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !67 + br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !68 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg !68 + br label %cleanup, !dbg !68 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !69 + %1 = load volatile i32, ptr @x, align 4, !dbg !71, !tbaa !54 + %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !72 + %2 = load volatile i32, ptr @x, align 4, !dbg !74, !tbaa !54 + %add = add nsw i32 %2, %call, !dbg !74 + store volatile i32 %add, ptr @x, align 4, !dbg !74, !tbaa !54 + %3 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !54 + %call2 = call i32 @bar(i32 noundef %3), !dbg !76 + %4 = load volatile i32, ptr @x, align 4, !dbg !78, !tbaa !54 + %add3 = add nsw i32 %4, %call2, !dbg !78 + store volatile i32 %add3, ptr @x, align 4, !dbg !78, !tbaa !54 + br i1 false, label %if.then5, label %if.end6, !dbg !79 + +if.then5: ; preds = %for.body + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !80 + br label %cleanup, !dbg !80 + +if.end6: ; preds = %for.body + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !82 + %5 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !54 + %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !84 + %6 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !54 + %add8 = add nsw i32 %6, %call7, !dbg !86 + store volatile i32 %add8, ptr @x, align 4, !dbg !86, !tbaa !54 + %7 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !54 + %call9 = call i32 @bar(i32 noundef %7), !dbg !88 + %8 = load volatile i32, ptr @x, align 4, !dbg !90, !tbaa !54 + %add10 = add nsw i32 %8, %call9, !dbg !90 + store volatile i32 %add10, ptr @x, align 4, !dbg !90, !tbaa !54 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !91 + %inc = add nsw i32 %i.0, 1, !dbg !91 + call void @llvm.dbg.value(metadata i32 %inc, metadata !50, metadata !DIExpression()), !dbg !62 + br label %for.cond, !dbg !92, !llvm.loop !93 + +cleanup: ; preds = %if.then5, %for.cond.cleanup + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !96 + br label %for.end + +for.end: ; preds = %cleanup, %if.then + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg !97 + ret i32 0, !dbg !97 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #5 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #6 + +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #3 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #6 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11} +!llvm.ident = !{!12} +!llvm.pseudo_probe_desc = !{!13, !14, !15} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "path") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 7, !"uwtable", i32 2} +!11 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!12 = !{!"clang version 17.0.0"} +!13 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} +!14 = !{i64 6699318081062747564, i64 563022570642068, !"foo"} +!15 = !{i64 -2624081020897602054, i64 1126158552146340, !"main"} +!16 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 2, type: !17, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19) +!17 = !DISubroutineType(types: !18) +!18 = !{!6, !6} +!19 = !{!20} +!20 = !DILocalVariable(name: "p", arg: 1, scope: !16, file: !3, line: 2, type: !6) +!21 = !DILocation(line: 0, scope: !16) +!22 = !DILocation(line: 3, column: 10, scope: !16) +!23 = !DILocation(line: 3, column: 3, scope: !16) +!24 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !25, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !27) +!25 = !DISubroutineType(types: !26) +!26 = !{!6, !6, !6} +!27 = !{!28, !29} +!28 = !DILocalVariable(name: "i", arg: 1, scope: !24, file: !3, line: 6, type: !6) +!29 = !DILocalVariable(name: "p", arg: 2, scope: !24, file: !3, line: 6, type: !6) +!30 = !DILocation(line: 0, scope: !24) +!31 = !DILocation(line: 7, column: 6, scope: !32) +!32 = distinct !DILexicalBlock(scope: !24, file: !3, line: 7, column: 6) +!33 = !DILocation(line: 7, column: 8, scope: !32) +!34 = !DILocation(line: 7, column: 6, scope: !24) +!35 = !DILocation(line: 7, column: 26, scope: !32) +!36 = !DILocation(line: 7, column: 22, scope: !37) +!37 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646575) +!38 = !DILocation(line: 7, column: 14, scope: !32) +!39 = !DILocation(line: 8, column: 19, scope: !32) +!40 = !DILocation(line: 8, column: 21, scope: !32) +!41 = !DILocation(line: 8, column: 15, scope: !42) +!42 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646583) +!43 = !DILocation(line: 8, column: 8, scope: !32) +!44 = !DILocation(line: 0, scope: !32) +!45 = !DILocation(line: 9, column: 1, scope: !24) +!46 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !47, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !49) +!47 = !DISubroutineType(types: !48) +!48 = !{!6} +!49 = !{!50} +!50 = !DILocalVariable(name: "i", scope: !51, file: !3, line: 14, type: !6) +!51 = distinct !DILexicalBlock(scope: !46, file: !3, line: 14, column: 3) +!52 = !DILocation(line: 12, column: 6, scope: !53) +!53 = distinct !DILexicalBlock(scope: !46, file: !3, line: 12, column: 6) +!54 = !{!55, !55, i64 0} +!55 = !{!"int", !56, i64 0} +!56 = !{!"omnipotent char", !57, i64 0} +!57 = !{!"Simple C/C++ TBAA"} +!58 = !DILocation(line: 12, column: 8, scope: !53) +!59 = !DILocation(line: 12, column: 6, scope: !46) +!60 = !DILocation(line: 13, column: 5, scope: !53) +!61 = !DILocation(line: 14, column: 11, scope: !51) +!62 = !DILocation(line: 0, scope: !51) +!63 = !DILocation(line: 14, column: 7, scope: !51) +!64 = !DILocation(line: 14, scope: !51) +!65 = !DILocation(line: 14, column: 18, scope: !66) +!66 = distinct !DILexicalBlock(scope: !51, file: !3, line: 14, column: 3) +!67 = !DILocation(line: 14, column: 20, scope: !66) +!68 = !DILocation(line: 14, column: 3, scope: !51) +!69 = !DILocation(line: 15, column: 15, scope: !70) +!70 = distinct !DILexicalBlock(scope: !66, file: !3, line: 14, column: 40) +!71 = !DILocation(line: 15, column: 18, scope: !70) +!72 = !DILocation(line: 15, column: 11, scope: !73) +!73 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646639) +!74 = !DILocation(line: 15, column: 8, scope: !70) +!75 = !DILocation(line: 16, column: 15, scope: !70) +!76 = !DILocation(line: 16, column: 11, scope: !77) +!77 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646647) +!78 = !DILocation(line: 16, column: 8, scope: !70) +!79 = !DILocation(line: 17, column: 9, scope: !70) +!80 = !DILocation(line: 18, column: 8, scope: !81) +!81 = distinct !DILexicalBlock(scope: !70, file: !3, line: 17, column: 9) +!82 = !DILocation(line: 19, column: 15, scope: !70) +!83 = !DILocation(line: 19, column: 18, scope: !70) +!84 = !DILocation(line: 19, column: 11, scope: !85) +!85 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646655) +!86 = !DILocation(line: 19, column: 8, scope: !70) +!87 = !DILocation(line: 20, column: 15, scope: !70) +!88 = !DILocation(line: 20, column: 11, scope: !89) +!89 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646663) +!90 = !DILocation(line: 20, column: 8, scope: !70) +!91 = !DILocation(line: 14, column: 36, scope: !66) +!92 = !DILocation(line: 14, column: 3, scope: !66) +!93 = distinct !{!93, !68, !94, !95} +!94 = !DILocation(line: 21, column: 3, scope: !51) +!95 = !{!"llvm.loop.mustprogress"} +!96 = !DILocation(line: 0, scope: !46) +!97 = !DILocation(line: 22, column: 1, scope: !46) diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext new file mode 100644 index 0000000000000000000000000000000000000000..5cd880b63baad65c011a682f924440cfbd4f28b7 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext @@ -0,0 +1,20 @@ +[baz]:150:10 + 1: 10 + 3: 20 + 5: 20 foo:20 +[foo]:102:1 + 1: 1 + 3: 1 +[main]:91:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: 3 baz:2 foo:1 +[main:10 @ foo]:2:1 + 3: 1 bar:1 + 4: 1 +[bar]:1:1 + 1: 1 +[main:10 @ foo:3 @ bar]:1:1 + 1: 1 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext new file mode 100644 index 0000000000000000000000000000000000000000..46564f65121e6c6c40e0296c7dcce16ea7bde985 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext @@ -0,0 +1,44 @@ +baz:160:10 + 1: 10 + 3: 20 + 5: foo:30 + 1: 20 + 3: bar:10 + 1: 10 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + !CFGChecksum: 1 + !Attributes: 1 +main:110:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: foo:2 + 4: 1 + 3: bar:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + 10: baz:20 + 10: 1 + 6: bar:3 + 1: 2 + 7: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 2 + !Attributes: 2 +foo:102:1 + 1: 1 + 3: 1 + !CFGChecksum: 3 + !Attributes: 3 +bar:1:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test index d458d7fad1be61c42791d52071f10a7c3214ae07..7b01324219115ced4f29670f6a91c54b74d7860c 100644 --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -1,14 +1,14 @@ -RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE -RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE -RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT -RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST diff --git a/llvm/test/tools/llvm-profdata/sample-flatten-profile.test b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test new file mode 100644 index 0000000000000000000000000000000000000000..90effcb25190ebcbdbee40319e694b71f7ce6e7e --- /dev/null +++ b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test @@ -0,0 +1,50 @@ +; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace + +; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile-cs.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile-cs.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS + +; CHECK:baz:169:10 +; CHECK-NEXT: 1: 10 +; CHECK-NEXT: 3: 20 +; CHECK-NEXT: 5: 20 foo:20 +; CHECK-NEXT: 6: 2 bar:2 +; CHECK-NEXT: 10: 1 +; CHECK-NEXT: !CFGChecksum: 1 +; CHECK-NEXT: !Attributes: 1 +; CHECK-NEXT:foo:134:21 +; CHECK-NEXT: 1: 21 +; CHECK-NEXT: 3: 12 bar:11 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: !CFGChecksum: 3 +; CHECK-NEXT: !Attributes: 3 +; CHECK-NEXT:main:91:1 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: 4.2: 1 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: 9: 3 bar:2 foo:1 +; CHECK-NEXT: 10: 3 baz:2 foo:1 +; CHECK-NEXT: !CFGChecksum: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT:bar:15:14 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: !CFGChecksum: 4 +; CHECK-NEXT: !Attributes: 4 + +; CHECK-CS:baz:150:10 +; CHECK-CS-NEXT: 1: 10 +; CHECK-CS-NEXT: 3: 20 +; CHECK-CS-NEXT: 5: 20 foo:20 +; CHECK-CS-NEXT:foo:104:2 +; CHECK-CS-NEXT: 1: 1 +; CHECK-CS-NEXT: 3: 2 bar:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT:main:91:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT: 4.2: 1 +; CHECK-CS-NEXT: 7: 1 +; CHECK-CS-NEXT: 9: 3 bar:2 foo:1 +; CHECK-CS-NEXT: 10: 3 baz:2 foo:1 +; CHECK-CS-NEXT:bar:2:2 +; CHECK-CS-NEXT: 1: 2 diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test index 1d7c2f9cd5dfe3cd0f4d8fbec86f2baaed46b718..0eb83838d16e7063130f7f3a40e77720b6d32516 100644 --- a/llvm/test/tools/llvm-profgen/profile-density.test +++ b/llvm/test/tools/llvm-profgen/profile-density.test @@ -4,7 +4,7 @@ ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -hot-function-density-threshold=1 &> %t4 ; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS -;CHECK-DENSITY: AutoFDO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. +;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. ;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2 ;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3 diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index 7b71d5ad45547920c75a37397a940e5950f5e672..733a58158afc74a2371e654d0a51a47e37bd8be6 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -439,8 +439,9 @@ std::unique_ptr CodeCoverageTool::load() { if (modifiedTimeGT(ObjectFilename, PGOFilename)) warning("profile data may be out of date - object is newer", ObjectFilename); + auto FS = vfs::getRealFileSystem(); auto CoverageOrErr = - CoverageMapping::load(ObjectFilenames, PGOFilename, CoverageArches, + CoverageMapping::load(ObjectFilenames, PGOFilename, *FS, CoverageArches, ViewOpts.CompilationDirectory, BIDFetcher.get()); if (Error E = CoverageOrErr.takeError()) { error("Failed to load coverage: " + toString(std::move(E))); diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index c8e5e6d1ad68f1f4a65f0f3752f28f260217ded0..afe2769643a891b4603fe2808c6878bcc2fd5a10 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -226,7 +227,8 @@ static void overlapInput(const std::string &BaseFilename, OverlapStats &Overlap, const OverlapFuncFilters &FuncFilter, raw_fd_ostream &OS, bool IsCS) { - auto ReaderOrErr = InstrProfReader::create(TestFilename); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); @@ -298,7 +300,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, return; } - auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); @@ -838,8 +841,9 @@ static void supplementInstrProfile( // Read sample profile. LLVMContext Context; + auto FS = vfs::getRealFileSystem(); auto ReaderOrErr = sampleprof::SampleProfileReader::create( - SampleFilename.str(), Context, FSDiscriminatorPassOption); + SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = ReaderOrErr.getError()) exitWithErrorCode(EC, SampleFilename); auto Reader = std::move(ReaderOrErr.get()); @@ -963,10 +967,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, + bool UseMD5, bool GenPartialProfile, + SampleProfileLayout ProfileLayout, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode, - bool DropProfileSymbolList) { + bool DropProfileSymbolList, size_t OutputSizeLimit) { using namespace sampleprof; SampleProfileMap ProfileMap; SmallVector, 5> Readers; @@ -975,7 +980,8 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, std::optional ProfileIsProbeBased; std::optional ProfileIsCS; for (const auto &Input : Inputs) { - auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = ReaderOrErr.getError()) { warnOrExitGivenError(FailMode, EC, Input.Filename); @@ -1042,9 +1048,12 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, SampleMergeColdContext, SampleColdContextFrameDepth, false); } - if (ProfileIsCS && GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + if (ProfileLayout == llvm::sampleprof::SPL_Flat) { + ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS); + ProfileIsCS = FunctionSamples::ProfileIsCS = false; + } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) { + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); ProfileIsCS = FunctionSamples::ProfileIsCS = false; } @@ -1059,7 +1068,10 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, auto Buffer = getInputFileBuf(ProfileSymbolListFile); handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, CompressAllSections, UseMD5, GenPartialProfile); - if (std::error_code EC = Writer->write(ProfileMap)) + + // If OutputSizeLimit is 0 (default), it is the same as write(). + if (std::error_code EC = + Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit)) exitWithErrorCode(std::move(EC)); } @@ -1202,6 +1214,11 @@ static int merge_main(int argc, const char *argv[]) { "sample-frame-depth-for-cold-context", cl::init(1), cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); + cl::opt OutputSizeLimit( + "output-size-limit", cl::init(0), cl::Hidden, + cl::desc("Trim cold functions until profile size is below specified " + "limit in bytes. This uses a heursitic and functions may be " + "excessively trimmed")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -1227,9 +1244,15 @@ static int merge_main(int argc, const char *argv[]) { "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); + cl::opt ProfileLayout( + "convert-sample-profile-layout", + cl::desc("Convert the generated profile to a profile with a new layout"), + cl::init(SPL_None), + cl::values( + clEnumValN(SPL_Nest, "nest", + "Nested profile, the input should be CS flat profile"), + clEnumValN(SPL_Flat, "flat", + "Profile with nested inlinee flatten out"))); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); @@ -1284,11 +1307,12 @@ static int merge_main(int argc, const char *argv[]) { OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode, ProfiledBinary); else - mergeSampleProfile( - WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, - ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, - GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList); + mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, + OutputFormat, ProfileSymbolListFile, CompressAllSections, + UseMD5, GenPartialProfile, ProfileLayout, + SampleMergeColdContext, SampleTrimColdContext, + SampleColdContextFrameDepth, FailureMode, + DropProfileSymbolList, OutputSizeLimit); return 0; } @@ -2189,12 +2213,13 @@ std::error_code SampleOverlapAggregator::loadProfiles() { using namespace sampleprof; LLVMContext Context; - auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, + auto FS = vfs::getRealFileSystem(); + auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = BaseReaderOrErr.getError()) exitWithErrorCode(EC, BaseFilename); - auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, + auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = TestReaderOrErr.getError()) exitWithErrorCode(EC, TestFilename); @@ -2372,7 +2397,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, exitWithError("JSON output is not supported for instr profiles"); if (SFormat == ShowFormat::Yaml) exitWithError("YAML output is not supported for instr profiles"); - auto ReaderOrErr = InstrProfReader::create(Filename); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(Filename, *FS); std::vector Cutoffs = std::move(DetailedSummaryCutoffs); if (ShowDetailedSummary && Cutoffs.empty()) { Cutoffs = ProfileSummaryBuilder::DefaultCutoffs; @@ -2742,8 +2768,9 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, exitWithError("YAML output is not supported for sample profiles"); using namespace sampleprof; LLVMContext Context; - auto ReaderOrErr = - SampleProfileReader::create(Filename, Context, FSDiscriminatorPassOption); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS, + FSDiscriminatorPassOption); if (std::error_code EC = ReaderOrErr.getError()) exitWithErrorCode(EC, Filename); diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp index dbc5bc7327d5cd751567317ee89d9e1bf1cbd5d4..330a8f33de21c79c06a3f07d1e2a816776709476 100644 --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -55,6 +55,12 @@ static cl::opt SamplePreInlineReplay( cl::desc( "Replay previous inlining and adjust context profile accordingly")); +static cl::opt CSPreinlMultiplierForPrevInl( + "csspgo-preinliner-multiplier-for-previous-inlining", cl::Hidden, + cl::init(100), + cl::desc( + "Multiplier to bump up callsite threshold for previous inlining.")); + CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, ProfiledBinary &Binary, ProfileSummary *Summary) : UseContextCost(UseContextCostForPreInliner), @@ -74,7 +80,12 @@ CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, std::vector CSPreInliner::buildTopDownOrder() { std::vector Order; - ProfiledCallGraph ProfiledCG(ContextTracker); + // Trim cold edges to get a more stable call graph. This allows for a more + // stable top-down order which in turns helps the stablity of the generated + // profile from run to run. + uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( + (Summary->getDetailedSummary())); + ProfiledCallGraph ProfiledCG(ContextTracker, ColdCountThreshold); // Now that we have a profiled call graph, construct top-down order // by building up SCC and reversing SCC order. @@ -146,11 +157,12 @@ uint32_t CSPreInliner::getFuncSize(const ContextTrieNode *ContextNode) { } bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { + bool WasInlined = + Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); // If replay inline is requested, simply follow the inline decision of the // profiled binary. if (SamplePreInlineReplay) - return Candidate.CalleeSamples->getContext().hasAttribute( - ContextWasInlined); + return WasInlined; unsigned int SampleThreshold = SampleColdCallSiteThreshold; uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( @@ -177,6 +189,12 @@ bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { // want any inlining for cold callsites. SampleThreshold = SampleHotCallSiteThreshold * NormalizedHotness * 100 + SampleColdCallSiteThreshold + 1; + // Bump up the threshold to favor previous compiler inline decision. The + // compiler has more insight and knowledge about functions based on their IR + // and attribures and should be able to make a more reasonable inline + // decision. + if (WasInlined) + SampleThreshold *= CSPreinlMultiplierForPrevInl; } return (Candidate.SizeCost < SampleThreshold); diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h index 09dd2dec1149c599e7989e03a3868488a59bfbb1..4d848aafdab914da33ea9155e446c8ef257010bf 100644 --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -41,6 +41,13 @@ struct ProfiledInlineCandidate { struct ProfiledCandidateComparer { bool operator()(const ProfiledInlineCandidate &LHS, const ProfiledInlineCandidate &RHS) { + // Always prioritize inlining zero-sized functions as they do not affect the + // size budget. This could happen when all of the callee's code is gone and + // only pseudo probes are left. + if ((LHS.SizeCost == 0 || RHS.SizeCost == 0) && + (LHS.SizeCost != RHS.SizeCost)) + return RHS.SizeCost == 0; + if (LHS.CallsiteCount != RHS.CallsiteCount) return LHS.CallsiteCount < RHS.CallsiteCount; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index dfc42a5f4e0215c31f75d8e41321f9391cef3a1f..6cb6b1e8f71312aea0ec322c60c75fa9562bd1a4 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -188,7 +188,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) { "set too low. Please check your command.\n"; else if (Density < HotFunctionDensityThreshold) WithColor::warning() - << "AutoFDO is estimated to optimize better with " + << "Sample PGO is estimated to optimize better with " << format("%.1f", HotFunctionDensityThreshold / Density) << "x more samples. Please consider increasing sampling rate or " "profiling for longer duration to get more samples.\n"; @@ -1026,8 +1026,8 @@ void CSProfileGenerator::postProcessProfiles() { calculateAndShowDensity(ContextLessProfiles); if (GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); FunctionSamples::ProfileIsCS = false; } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 00e9d5025876931bb70b42db5a92ede3da99f905..5d9a82ccf3e6127fad65f52af4232fc7ebda2fb5 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -163,12 +163,13 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( } ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath, - const StringRef DebugBinPath) - : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), + const StringRef DebugBinPath) + : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), + SymbolizerOpts(getSymbolizerOpts()), ProEpilogTracker(this), + Symbolizer(std::make_unique(SymbolizerOpts)), TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) { // Point to executable binary if debug info binary is not specified. SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; - setupSymbolizer(); if (InferMissingFrames) MissingContextInferrer = std::make_unique(this); load(); @@ -214,9 +215,7 @@ void ProfiledBinary::load() { exitWithError("not a valid Elf image", Path); TheTriple = Obj->makeTriple(); - // Current only support X86 - if (!TheTriple.isX86()) - exitWithError("unsupported target", TheTriple.getTriple()); + LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); // Find the preferred load address for text sections. @@ -840,7 +839,7 @@ void ProfiledBinary::populateSymbolListFromDWARF( SymbolList.add(I.second.getFuncName()); } -void ProfiledBinary::setupSymbolizer() { +symbolize::LLVMSymbolizer::Options ProfiledBinary::getSymbolizerOpts() const { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::LinkageName; @@ -849,7 +848,7 @@ void ProfiledBinary::setupSymbolizer() { SymbolizerOpts.UseSymbolTable = false; SymbolizerOpts.RelativeAddresses = false; SymbolizerOpts.DWPName = DWPPath; - Symbolizer = std::make_unique(SymbolizerOpts); + return SymbolizerOpts; } SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index cdbaec740b4a632795d30b315539d7a770ad507a..d44a77a8b6a3da5b127457ae1a5ab9c5d7a39c13 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -188,10 +188,12 @@ class ProfiledBinary { std::string Path; // Path of the debug info binary. std::string DebugBinaryPath; - // Path of symbolizer path which should be pointed to binary with debug info. - StringRef SymbolizerPath; // The target triple. Triple TheTriple; + // Path of symbolizer path which should be pointed to binary with debug info. + StringRef SymbolizerPath; + // Options used to configure the symbolizer + symbolize::LLVMSymbolizer::Options SymbolizerOpts; // The runtime base address that the first executable segment is loaded at. uint64_t BaseAddress = 0; // The runtime base address that the first loadabe segment is loaded at. @@ -302,7 +304,7 @@ class ProfiledBinary { // Set up disassembler and related components. void setUpDisassembler(const ELFObjectFileBase *Obj); - void setupSymbolizer(); + symbolize::LLVMSymbolizer::Options getSymbolizerOpts() const; // Load debug info of subprograms from DWARF section. void loadSymbolsFromDWARF(ObjectFile &Obj); @@ -493,7 +495,7 @@ public: SampleContextFrameVector getFrameLocationStack(uint64_t Address, bool UseProbeDiscriminator = false) { InstructionPointer IP(this, Address); - return symbolize(IP, true, UseProbeDiscriminator); + return symbolize(IP, SymbolizerOpts.UseSymbolTable, UseProbeDiscriminator); } const SampleContextFrameVector & diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index 596882c1b93fa2fa1c888d579a00d41da113f2e5..3b974e25103ad4bfc40604a02a96920a5d6571fa 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/VirtualFileSystem.h" static cl::OptionCategory ProfGenCategory("ProfGen Options"); @@ -157,7 +158,9 @@ int main(int argc, const char *argv[]) { if (SampleProfFilename.getNumOccurrences()) { LLVMContext Context; - auto ReaderOrErr = SampleProfileReader::create(SampleProfFilename, Context); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = + SampleProfileReader::create(SampleProfFilename, Context, *FS); std::unique_ptr Reader = std::move(ReaderOrErr.get()); Reader->read(); diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index a8db0c62898ee7ce3428362b3a7225a7859b9ce6..bb9711e7aa65abfb00e66f97363c22d4d58a1098 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -31,6 +31,7 @@ #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" @@ -333,22 +334,25 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, bool EnableDebugify, bool VerifyDIPreserve) { bool VerifyEachPass = VK == VK_VerifyEachPass; + auto FS = vfs::getRealFileSystem(); std::optional P; switch (PGOKindFlag) { case InstrGen: - P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr); + P = PGOOptions(ProfileFile, "", "", FS, PGOOptions::IRInstr); break; case InstrUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse); + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS, + PGOOptions::IRUse); break; case SampleUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS, PGOOptions::SampleUse); break; case NoPGO: if (DebugInfoForProfiling || PseudoProbeForProfiling) - P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, - DebugInfoForProfiling, PseudoProbeForProfiling); + P = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, + PGOOptions::NoCSAction, DebugInfoForProfiling, + PseudoProbeForProfiling); else P = std::nullopt; } @@ -367,7 +371,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, P->CSAction = PGOOptions::CSIRInstr; P->CSProfileGenFile = CSProfileGenFile; } else - P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, + P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, FS, PGOOptions::NoAction, PGOOptions::CSIRInstr); } else /* CSPGOKindFlag == CSInstrUse */ { if (!P) { diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp index 92925c7e9e683abee888a64e41f57e7695c0387e..bd34b05336e19e60a0a675e05c50c4593130625e 100644 --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/SupportHelpers.h" #include "gtest/gtest.h" @@ -57,8 +58,9 @@ struct SampleProfTest : ::testing::Test { void readProfile(const Module &M, StringRef Profile, StringRef RemapFile = "") { + auto FS = vfs::getRealFileSystem(); auto ReaderOrErr = SampleProfileReader::create( - std::string(Profile), Context, FSDiscriminatorPass::Base, + std::string(Profile), Context, *FS, FSDiscriminatorPass::Base, std::string(RemapFile)); ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); diff --git a/llvm/unittests/tools/CMakeLists.txt b/llvm/unittests/tools/CMakeLists.txt index 7ef64f1180a00e77dd15d686a106191edc73ba18..e032113fa77e6a0892c0edc0490cc704855bd008 100644 --- a/llvm/unittests/tools/CMakeLists.txt +++ b/llvm/unittests/tools/CMakeLists.txt @@ -7,5 +7,6 @@ endif() add_subdirectory( llvm-exegesis ) +add_subdirectory(llvm-profdata) add_subdirectory(llvm-profgen) add_subdirectory(llvm-mca) diff --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dab1ac523ed00269dc0b6079f5daf66f25d52b67 --- /dev/null +++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + ProfileData + Support + ) + +add_llvm_unittest(LLVMProfdataTests + OutputSizeLimitTest.cpp + ) + +target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport) + +set_property(TARGET LLVMProfdataTests PROPERTY FOLDER "Tests/UnitTests/ToolTests") diff --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4f82483589690ede7a70ebc14a065c3b9b19680 --- /dev/null +++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp @@ -0,0 +1,182 @@ +//===- llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ProfileData/SampleProfWriter.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using llvm::unittest::TempFile; + +std::string Input1 = R"(main:184019:0 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 + 9: 2064 _Z3bari:1471 _Z3fooi:631 + 10: inline1:1000 + 1: 1000 + 10: inline2:2000 + 1: 2000 +_Z3bari:20301:1437 + 1: 1437 +_Z3fooi:7711:610 + 1: 610)"; + +const char EmptyProfile[18] = "\xff\xe5\xd0\xb1\xf4\xc9\x94\xa8\x53\x67"; + +/// sys::fs and SampleProf mix Error and error_code, making an adapter class +/// to keep code elegant. +template class ExpectedErrorOr : public Expected { +public: + ExpectedErrorOr(T &&Obj) : Expected(Obj) {} + + ExpectedErrorOr(std::error_code EC) : Expected(errorCodeToError(EC)) {} + + ExpectedErrorOr(Error &&E) : Expected(std::move(E)) {} + + template + ExpectedErrorOr(ErrorOr &&E) + : Expected(errorCodeToError(E.getError())) {} + + template + ExpectedErrorOr(Expected &&E) : Expected(E.takeError()) {} +}; + +#define DEF_VAR_RETURN_IF_ERROR(Var, Value) \ + auto Var##OrErr = Value; \ + if (!Var##OrErr) \ + return Var##OrErr; \ + auto Var = std::move(Var##OrErr.get()) + +#define VAR_RETURN_IF_ERROR(Var, Value) \ + Var##OrErr = Value; \ + if (!Var##OrErr) \ + return Var##OrErr; \ + Var = std::move(Var##OrErr.get()) + +#define RETURN_IF_ERROR(Value) \ + if (auto E = Value) \ + return std::move(E) + +/// The main testing routine. After rewriting profiles with size limit, check +/// the following: +/// 1. The file size of the new profile is within the size limit. +/// 2. The new profile is a subset of the old profile, and the content of every +/// sample in the new profile is unchanged. +/// Note that even though by default samples with fewest total count are dropped +/// first, this is not a requirement. Samples can be dropped by any order. +static ExpectedErrorOr RunTest(StringRef Input, size_t SizeLimit, + SampleProfileFormat Format) { + // Read Input profile. + auto FS = vfs::getRealFileSystem(); + LLVMContext Context; + auto InputBuffer = MemoryBuffer::getMemBuffer(Input); + DEF_VAR_RETURN_IF_ERROR( + Reader, SampleProfileReader::create(InputBuffer, Context, *FS)); + RETURN_IF_ERROR(Reader->read()); + SampleProfileMap OldProfiles = Reader->getProfiles(); + + // Rewrite it to a temp file with size limit. + TempFile Temp("profile", "afdo", "", true); + bool isEmpty = false; + { + DEF_VAR_RETURN_IF_ERROR(Writer, + SampleProfileWriter::create(Temp.path(), Format)); + std::error_code EC = Writer->writeWithSizeLimit(OldProfiles, SizeLimit); + // too_large means no sample could be written because SizeLimit is too + // small. Otherwise any other error code indicates unexpected failure. + if (EC == sampleprof_error::too_large) + isEmpty = true; + else if (EC) + return EC; + } + + // Read the temp file to get new profiles. Use the default empty profile if + // temp file was not written because size limit is too small. + SampleProfileMap NewProfiles; + InputBuffer = MemoryBuffer::getMemBuffer(StringRef(EmptyProfile, 17)); + DEF_VAR_RETURN_IF_ERROR( + NewReader, SampleProfileReader::create(InputBuffer, Context, *FS)); + if (!isEmpty) { + VAR_RETURN_IF_ERROR(NewReader, SampleProfileReader::create( + Temp.path().str(), Context, *FS)); + RETURN_IF_ERROR(NewReader->read()); + NewProfiles = NewReader->getProfiles(); + } + + // Check temp file is actually within size limit. + uint64_t FileSize; + RETURN_IF_ERROR(sys::fs::file_size(Temp.path(), FileSize)); + EXPECT_LE(FileSize, SizeLimit); + + // For compact binary format, function names are stored as MD5, so we cannot + // directly match the samples of the new profile with the old profile. A + // simple way is to convert the old profile to compact binary format and read + // it back + if (Format == llvm::sampleprof::SPF_Compact_Binary) { + TempFile CompBinary("compbinary", "afdo", "", true); + { + DEF_VAR_RETURN_IF_ERROR( + Writer, SampleProfileWriter::create( + CompBinary.path(), llvm::sampleprof::SPF_Compact_Binary)); + RETURN_IF_ERROR(Writer->write(OldProfiles)); + } + VAR_RETURN_IF_ERROR(Reader, SampleProfileReader::create( + CompBinary.path().str(), Context, *FS)); + RETURN_IF_ERROR(Reader->read()); + OldProfiles = Reader->getProfiles(); + } + + // For every sample in the new profile, confirm it is in the old profile and + // unchanged. + for (auto Sample : NewProfiles) { + auto FindResult = OldProfiles.find(Sample.first); + EXPECT_NE(FindResult, OldProfiles.end()); + if (FindResult != OldProfiles.end()) { + EXPECT_EQ(Sample.second.getHeadSamples(), + FindResult->second.getHeadSamples()); + EXPECT_EQ(Sample.second, FindResult->second); + } + } + return nullptr; +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitExtBinary) { + for (size_t OutputSizeLimit : {490, 489, 488, 475, 474, 459, 400}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Ext_Binary), + Succeeded()); +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitBinary) { + for (size_t OutputSizeLimit : {250, 249, 248, 237, 236, 223, 200}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Binary), + Succeeded()); +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitCompBinary) { + for (size_t OutputSizeLimit : {277, 276, 275, 264, 263, 250, 200}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Compact_Binary), + Succeeded()); +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitText) { + for (size_t OutputSizeLimit : + {229, 228, 227, 213, 212, 211, 189, 188, 187, 186, 150}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Text), + Succeeded()); +}