From 2c76d2ad544909148e67ff5b39da516a0d5efb25 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Mon, 27 Mar 2023 11:40:52 -0700 Subject: [PATCH 01/13] [CSSPGO][Preinliner] Trim cold call edges of the profiled call graph for a more stable profile generation. I've noticed that for some services CSSPGO profile is less stable than non-CS AutoFDO profile from profiling to profiling without source changes. This is manifested by comparing profile similarities. For example in my experiments, AutoFDO profiles are always 99+% similar over same binary but different inputs (very close dynamic traffics) while CSSPGO profile similarity is around 90%. The main source of the profile stability is the top-down order computed on the profiled call graph in the llvm-profgen CS preinliner. The top-down order is used to guide the CS preinliner to pre-compute an inline decision that is later on fulfilled by the compiler. A subtle change in the top-down order from run to run could cause a different inline decision computed. A deeper look in the diversion of the top-down order revealed that: - The topological sorting inside one SCC isn't quite right. This is fixed by {D130717}. - The profiled call graphs of the two sides of the A/B run isn't 100% the same. The call edges in the two runs do not subsume each other, and edges appear in both graphs may not have exactly the same weight. This is due to the nature that the graphs are dynamic. However, I saw that the graphs can be made more close by removing the cold edges from them and this bumped up the CSSPGO profile stableness to the same level of the AutoFDO profile. Removing cold call edges from the dynamic call graph may have an impact on cold inlining, but so far I haven't seen any performance issues since the CS preinliner mainly targets hot callsites, and cold inlining can always be done by the compiler CGSCC inliner. Also fixing an issue where the largest weight instead of the accumulated weight for a call edge is used in the profiled call graph. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D147013 --- .../llvm/Transforms/IPO/ProfiledCallGraph.h | 38 +++++++++++++++++-- llvm/tools/llvm-profgen/CSPreInliner.cpp | 7 +++- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index 5e12fcfeae1b..bc8360a80bc0 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -64,16 +64,22 @@ public: using iterator = ProfiledCallGraphNode::iterator; // Constructor for non-CS profile. - ProfiledCallGraph(SampleProfileMap &ProfileMap) { + ProfiledCallGraph(SampleProfileMap &ProfileMap, + uint64_t IgnoreColdCallThreshold = 0) { assert(!FunctionSamples::ProfileIsCS && "CS flat profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); } + + // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims + // for a more stable call graph with "determinstic" edges from run to run. + trimColdEges(IgnoreColdCallThreshold); } // Constructor for CS profile. - ProfiledCallGraph(SampleContextTracker &ContextTracker) { + ProfiledCallGraph(SampleContextTracker &ContextTracker, + uint64_t IgnoreColdCallThreshold = 0) { // BFS traverse the context profile trie to add call edges for calls shown // in context. std::queue Queue; @@ -121,11 +127,16 @@ public: ContextTracker.getFuncNameFor(Callee), Weight); } } + + // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims + // for a more stable call graph with "determinstic" edges from run to run. + trimColdEges(IgnoreColdCallThreshold); } iterator begin() { return Root.Edges.begin(); } iterator end() { return Root.Edges.end(); } ProfiledCallGraphNode *getEntryNode() { return &Root; } + void addProfiledFunction(StringRef Name) { if (!ProfiledFunctions.count(Name)) { // Link to synthetic root to make sure every node is reachable @@ -148,8 +159,9 @@ private: auto EdgeIt = Edges.find(Edge); if (EdgeIt == Edges.end()) { Edges.insert(Edge); - } else if (EdgeIt->Weight < Edge.Weight) { - // Replace existing call edges with same target but smaller weight. + } else { + // Accumulate weight to the existing edge. + Edge.Weight += EdgeIt->Weight; Edges.erase(EdgeIt); Edges.insert(Edge); } @@ -175,6 +187,24 @@ private: } } + // Trim edges with weight up to `Threshold`. Do not trim anything if + // `Threshold` is zero. + void trimColdEges(uint64_t Threshold = 0) { + if (!Threshold) + return; + + for (auto &Node : ProfiledFunctions) { + auto &Edges = Node.second.Edges; + auto I = Edges.begin(); + while (I != Edges.end()) { + if (I->Weight <= Threshold) + I = Edges.erase(I); + else + I++; + } + } + } + ProfiledCallGraphNode Root; StringMap ProfiledFunctions; }; diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp index dbc5bc7327d5..d10cf4c895b2 100644 --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -74,7 +74,12 @@ CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, std::vector CSPreInliner::buildTopDownOrder() { std::vector Order; - ProfiledCallGraph ProfiledCG(ContextTracker); + // Trim cold edges to get a more stable call graph. This allows for a more + // stable top-down order which in turns helps the stablity of the generated + // profile from run to run. + uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( + (Summary->getDetailedSummary())); + ProfiledCallGraph ProfiledCG(ContextTracker, ColdCountThreshold); // Now that we have a profiled call graph, construct top-down order // by building up SCC and reversing SCC order. -- Gitee From c76e51fcec928f421786300ec3593c00ee296511 Mon Sep 17 00:00:00 2001 From: William Huang Date: Wed, 8 Feb 2023 02:59:12 +0000 Subject: [PATCH 02/13] [llvm-profdata] Add option to cap profile output size D139603 (add option to llvm-profdata to reduce output profile size) contains test cases that are not cross-platform. Moving those tests to unit test and making sure the feature is callable from llvm library Reviewed By: snehasish Differential Revision: https://reviews.llvm.org/D141446 --- llvm/include/llvm/ProfileData/SampleProf.h | 23 +++ .../llvm/ProfileData/SampleProfWriter.h | 71 +++++++ llvm/lib/ProfileData/SampleProfWriter.cpp | 152 ++++++++++++--- llvm/tools/llvm-profdata/llvm-profdata.cpp | 15 +- llvm/unittests/tools/CMakeLists.txt | 1 + .../tools/llvm-profdata/CMakeLists.txt | 12 ++ .../llvm-profdata/OutputSizeLimitTest.cpp | 182 ++++++++++++++++++ 7 files changed, 430 insertions(+), 26 deletions(-) create mode 100644 llvm/unittests/tools/llvm-profdata/CMakeLists.txt create mode 100644 llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 13f0157222ec..faee9639ea86 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -427,6 +427,14 @@ public: void print(raw_ostream &OS, unsigned Indent) const; void dump() const; + bool operator==(const SampleRecord &Other) const { + return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets; + } + + bool operator!=(const SampleRecord &Other) const { + return !(*this == Other); + } + private: uint64_t NumSamples = 0; CallTargetMap CallTargets; @@ -1149,6 +1157,21 @@ public: // all the inline instances and names of call targets. void findAllNames(DenseSet &NameSet) const; + bool operator==(const FunctionSamples &Other) const { + return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap || + (GUIDToFuncNameMap && Other.GUIDToFuncNameMap && + *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) && + FunctionHash == Other.FunctionHash && Context == Other.Context && + TotalSamples == Other.TotalSamples && + TotalHeadSamples == Other.TotalHeadSamples && + BodySamples == Other.BodySamples && + CallsiteSamples == Other.CallsiteSamples; + } + + bool operator!=(const FunctionSamples &Other) const { + return !(*this == Other); + } + private: /// CFG hash value for the function. uint64_t FunctionHash = 0; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index b1ed0335e9c9..4edb3b049e70 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -35,6 +35,56 @@ enum SectionLayout { NumOfLayout, }; +/// When writing a profile with size limit, user may want to use a different +/// strategy to reduce function count other than dropping functions with fewest +/// samples first. In this case a class implementing the same interfaces should +/// be provided to SampleProfileWriter::writeWithSizeLimit(). +class FunctionPruningStrategy { +protected: + SampleProfileMap &ProfileMap; + size_t OutputSizeLimit; + +public: + /// \p ProfileMap A reference to the original profile map. It will be modified + /// by Erase(). + /// \p OutputSizeLimit Size limit in bytes of the output profile. This is + /// necessary to estimate how many functions to remove. + FunctionPruningStrategy(SampleProfileMap &ProfileMap, size_t OutputSizeLimit) + : ProfileMap(ProfileMap), OutputSizeLimit(OutputSizeLimit) {} + + virtual ~FunctionPruningStrategy() = default; + + /// SampleProfileWriter::writeWithSizeLimit() calls this after every write + /// iteration if the output size still exceeds the limit. This function + /// should erase some functions from the profile map so that the writer tries + /// to write the profile again with fewer functions. At least 1 entry from the + /// profile map must be erased. + /// + /// \p CurrentOutputSize Number of bytes in the output if current profile map + /// is written. + virtual void Erase(size_t CurrentOutputSize) = 0; +}; + +class DefaultFunctionPruningStrategy : public FunctionPruningStrategy { + std::vector SortedFunctions; + +public: + DefaultFunctionPruningStrategy(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit); + + /// In this default implementation, functions with fewest samples are dropped + /// first. Since the exact size of the output cannot be easily calculated due + /// to compression, we use a heuristic to remove as many functions as + /// necessary but not too many, aiming to minimize the number of write + /// iterations. + /// Empirically, functions with larger total sample count contain linearly + /// more sample entries, meaning it takes linearly more space to write them. + /// The cumulative length is therefore quadratic if all functions are sorted + /// by total sample count. + /// TODO: Find better heuristic. + void Erase(size_t CurrentOutputSize) override; +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -50,6 +100,17 @@ public: /// \returns status code of the file update operation. virtual std::error_code write(const SampleProfileMap &ProfileMap); + /// Write sample profiles up to given size limit, using the pruning strategy + /// to drop some functions if necessary. + /// + /// \returns status code of the file update operation. + template + std::error_code writeWithSizeLimit(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit) { + FunctionPruningStrategy Strategy(ProfileMap, OutputSizeLimit); + return writeWithSizeLimitInternal(ProfileMap, OutputSizeLimit, &Strategy); + } + raw_ostream &getOutputStream() { return *OutputStream; } /// Profile writer factory. @@ -79,6 +140,15 @@ protected: // Write function profiles to the profile file. virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap); + std::error_code writeWithSizeLimitInternal(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit, + FunctionPruningStrategy *Strategy); + + /// For writeWithSizeLimit in text mode, each newline takes 1 additional byte + /// on Windows when actually written to the file, but not written to a memory + /// buffer. This needs to be accounted for when rewriting the profile. + size_t LineCount; + /// Output stream where to emit the profile to. std::unique_ptr OutputStream; @@ -102,6 +172,7 @@ protected: : SampleProfileWriter(OS), Indent(0) {} std::error_code writeHeader(const SampleProfileMap &ProfileMap) override { + LineCount = 0; return sampleprof_error::success; } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 093790afe2d6..bce858a99a81 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -37,9 +38,109 @@ #include #include +#define DEBUG_TYPE "llvm-profdata" + using namespace llvm; using namespace sampleprof; +namespace llvm { +namespace support { +namespace endian { +namespace { + +// Adapter class to llvm::support::endian::Writer for pwrite(). +struct SeekableWriter { + raw_pwrite_stream &OS; + endianness Endian; + SeekableWriter(raw_pwrite_stream &OS, endianness Endian) + : OS(OS), Endian(Endian) {} + + template + void pwrite(ValueType Val, size_t Offset) { + std::string StringBuf; + raw_string_ostream SStream(StringBuf); + Writer(SStream, Endian).write(Val); + OS.pwrite(StringBuf.data(), StringBuf.size(), Offset); + } +}; + +} // namespace +} // namespace endian +} // namespace support +} // namespace llvm + +DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy( + SampleProfileMap &ProfileMap, size_t OutputSizeLimit) + : FunctionPruningStrategy(ProfileMap, OutputSizeLimit) { + sortFuncProfiles(ProfileMap, SortedFunctions); +} + +void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) { + double D = (double)OutputSizeLimit / CurrentOutputSize; + size_t NewSize = (size_t)round(ProfileMap.size() * D * D); + size_t NumToRemove = ProfileMap.size() - NewSize; + if (NumToRemove < 1) + NumToRemove = 1; + + assert(NumToRemove <= SortedFunctions.size()); + llvm::for_each( + llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() - + NumToRemove, + SortedFunctions.end()), + [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); }); + SortedFunctions.resize(SortedFunctions.size() - NumToRemove); +} + +std::error_code SampleProfileWriter::writeWithSizeLimitInternal( + SampleProfileMap &ProfileMap, size_t OutputSizeLimit, + FunctionPruningStrategy *Strategy) { + if (OutputSizeLimit == 0) + return write(ProfileMap); + + size_t OriginalFunctionCount = ProfileMap.size(); + + std::unique_ptr OriginalOutputStream; + OutputStream.swap(OriginalOutputStream); + + size_t IterationCount = 0; + size_t TotalSize; + + SmallVector StringBuffer; + do { + StringBuffer.clear(); + OutputStream.reset(new raw_svector_ostream(StringBuffer)); + if (std::error_code EC = write(ProfileMap)) + return EC; + + TotalSize = StringBuffer.size(); + // On Windows every "\n" is actually written as "\r\n" to disk but not to + // memory buffer, this difference should be added when considering the total + // output size. +#ifdef _WIN32 + if (Format == SPF_Text) + TotalSize += LineCount; +#endif + if (TotalSize <= OutputSizeLimit) + break; + + Strategy->Erase(TotalSize); + IterationCount++; + } while (ProfileMap.size() != 0); + + if (ProfileMap.size() == 0) + return sampleprof_error::too_large; + + OutputStream.swap(OriginalOutputStream); + OutputStream->write(StringBuffer.data(), StringBuffer.size()); + LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount + << " functions, reduced to " << ProfileMap.size() << " in " + << IterationCount << " iterations\n"); + // Silence warning on Release build. + (void)OriginalFunctionCount; + (void)IterationCount; + return sampleprof_error::success; +} + std::error_code SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) { std::vector V; @@ -116,6 +217,12 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection( std::error_code SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) { + // When calling write on a different profile map, existing states should be + // cleared. + NameTable.clear(); + CSNameTable.clear(); + SecHdrTable.clear(); + if (std::error_code EC = writeHeader(ProfileMap)) return EC; @@ -477,6 +584,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { if (Indent == 0) OS << ":" << S.getHeadSamples(); OS << "\n"; + LineCount++; SampleSorter SortedSamples(S.getBodySamples()); for (const auto &I : SortedSamples.get()) { @@ -493,6 +601,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { for (const auto &J : Sample.getSortedCallTargets()) OS << " " << J.first << ":" << J.second; OS << "\n"; + LineCount++; } SampleSorter SortedCallsiteSamples( @@ -515,11 +624,13 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { if (FunctionSamples::ProfileIsProbeBased) { OS.indent(Indent + 1); OS << "!CFGChecksum: " << S.getFunctionHash() << "\n"; + LineCount++; } if (S.getContext().getAllAttributes()) { OS.indent(Indent + 1); OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n"; + LineCount++; } return sampleprof_error::success; @@ -605,14 +716,10 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() { auto &OS = *OutputStream; // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable. - auto &OFS = static_cast(OS); uint64_t FuncOffsetTableStart = OS.tell(); - if (OFS.seek(TableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - Writer.write(FuncOffsetTableStart); - if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; + support::endian::SeekableWriter Writer(static_cast(OS), + support::little); + Writer.pwrite(FuncOffsetTableStart, TableOffset); // Write out the table size. encodeULEB128(FuncOffsetTable.size(), OS); @@ -623,6 +730,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() { return EC; encodeULEB128(Entry.second, OS); } + FuncOffsetTable.clear(); return sampleprof_error::success; } @@ -650,6 +758,10 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) { std::error_code SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) { + // When calling write on a different profile map, existing names should be + // cleared. + NameTable.clear(); + writeMagicIdent(Format); computeSummary(ProfileMap); @@ -690,14 +802,6 @@ void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { } std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { - auto &OFS = static_cast(*OutputStream); - uint64_t Saved = OutputStream->tell(); - - // Set OutputStream to the location saved in SecHdrTableOffset. - if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - assert(SecHdrTable.size() == SectionHdrLayout.size() && "SecHdrTable entries doesn't match SectionHdrLayout"); SmallVector IndexMap(SecHdrTable.size(), -1); @@ -714,21 +818,23 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { // needs to be computed after SecLBRProfile (the order in SecHdrTable), // but it needs to be read before SecLBRProfile (the order in // SectionHdrLayout). So we use IndexMap above to switch the order. + support::endian::SeekableWriter Writer( + static_cast(*OutputStream), support::little); for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size(); LayoutIdx++) { assert(IndexMap[LayoutIdx] < SecHdrTable.size() && "Incorrect LayoutIdx in SecHdrTable"); auto Entry = SecHdrTable[IndexMap[LayoutIdx]]; - Writer.write(static_cast(Entry.Type)); - Writer.write(static_cast(Entry.Flags)); - Writer.write(static_cast(Entry.Offset)); - Writer.write(static_cast(Entry.Size)); + Writer.pwrite(static_cast(Entry.Type), + SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Flags), + SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Offset), + SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Size), + SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t)); } - // Reset OutputStream. - if (OFS.seek(Saved) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - return sampleprof_error::success; } diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index c8e5e6d1ad68..eafeb299c52d 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -966,7 +966,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode, - bool DropProfileSymbolList) { + bool DropProfileSymbolList, size_t OutputSizeLimit) { using namespace sampleprof; SampleProfileMap ProfileMap; SmallVector, 5> Readers; @@ -1059,7 +1059,10 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, auto Buffer = getInputFileBuf(ProfileSymbolListFile); handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, CompressAllSections, UseMD5, GenPartialProfile); - if (std::error_code EC = Writer->write(ProfileMap)) + + // If OutputSizeLimit is 0 (default), it is the same as write(). + if (std::error_code EC = + Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit)) exitWithErrorCode(std::move(EC)); } @@ -1202,6 +1205,11 @@ static int merge_main(int argc, const char *argv[]) { "sample-frame-depth-for-cold-context", cl::init(1), cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); + cl::opt OutputSizeLimit( + "output-size-limit", cl::init(0), cl::Hidden, + cl::desc("Trim cold functions until profile size is below specified " + "limit in bytes. This uses a heursitic and functions may be " + "excessively trimmed")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -1288,7 +1296,8 @@ static int merge_main(int argc, const char *argv[]) { WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList); + SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList, + OutputSizeLimit); return 0; } diff --git a/llvm/unittests/tools/CMakeLists.txt b/llvm/unittests/tools/CMakeLists.txt index 7ef64f1180a0..e032113fa77e 100644 --- a/llvm/unittests/tools/CMakeLists.txt +++ b/llvm/unittests/tools/CMakeLists.txt @@ -7,5 +7,6 @@ endif() add_subdirectory( llvm-exegesis ) +add_subdirectory(llvm-profdata) add_subdirectory(llvm-profgen) add_subdirectory(llvm-mca) diff --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt new file mode 100644 index 000000000000..dab1ac523ed0 --- /dev/null +++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + ProfileData + Support + ) + +add_llvm_unittest(LLVMProfdataTests + OutputSizeLimitTest.cpp + ) + +target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport) + +set_property(TARGET LLVMProfdataTests PROPERTY FOLDER "Tests/UnitTests/ToolTests") diff --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp new file mode 100644 index 000000000000..c4f824835896 --- /dev/null +++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp @@ -0,0 +1,182 @@ +//===- llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ProfileData/SampleProfWriter.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using llvm::unittest::TempFile; + +std::string Input1 = R"(main:184019:0 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 + 9: 2064 _Z3bari:1471 _Z3fooi:631 + 10: inline1:1000 + 1: 1000 + 10: inline2:2000 + 1: 2000 +_Z3bari:20301:1437 + 1: 1437 +_Z3fooi:7711:610 + 1: 610)"; + +const char EmptyProfile[18] = "\xff\xe5\xd0\xb1\xf4\xc9\x94\xa8\x53\x67"; + +/// sys::fs and SampleProf mix Error and error_code, making an adapter class +/// to keep code elegant. +template class ExpectedErrorOr : public Expected { +public: + ExpectedErrorOr(T &&Obj) : Expected(Obj) {} + + ExpectedErrorOr(std::error_code EC) : Expected(errorCodeToError(EC)) {} + + ExpectedErrorOr(Error &&E) : Expected(std::move(E)) {} + + template + ExpectedErrorOr(ErrorOr &&E) + : Expected(errorCodeToError(E.getError())) {} + + template + ExpectedErrorOr(Expected &&E) : Expected(E.takeError()) {} +}; + +#define DEF_VAR_RETURN_IF_ERROR(Var, Value) \ + auto Var##OrErr = Value; \ + if (!Var##OrErr) \ + return Var##OrErr; \ + auto Var = std::move(Var##OrErr.get()) + +#define VAR_RETURN_IF_ERROR(Var, Value) \ + Var##OrErr = Value; \ + if (!Var##OrErr) \ + return Var##OrErr; \ + Var = std::move(Var##OrErr.get()) + +#define RETURN_IF_ERROR(Value) \ + if (auto E = Value) \ + return std::move(E) + +/// The main testing routine. After rewriting profiles with size limit, check +/// the following: +/// 1. The file size of the new profile is within the size limit. +/// 2. The new profile is a subset of the old profile, and the content of every +/// sample in the new profile is unchanged. +/// Note that even though by default samples with fewest total count are dropped +/// first, this is not a requirement. Samples can be dropped by any order. +static ExpectedErrorOr RunTest(StringRef Input, size_t SizeLimit, + SampleProfileFormat Format) { + // Read Input profile. + auto FS = vfs::getRealFileSystem(); + LLVMContext Context; + auto InputBuffer = MemoryBuffer::getMemBuffer(Input); + DEF_VAR_RETURN_IF_ERROR( + Reader, SampleProfileReader::create(InputBuffer, Context, *FS)); + RETURN_IF_ERROR(Reader->read()); + SampleProfileMap OldProfiles = Reader->getProfiles(); + + // Rewrite it to a temp file with size limit. + TempFile Temp("profile", "afdo", "", true); + bool isEmpty = false; + { + DEF_VAR_RETURN_IF_ERROR(Writer, + SampleProfileWriter::create(Temp.path(), Format)); + std::error_code EC = Writer->writeWithSizeLimit(OldProfiles, SizeLimit); + // too_large means no sample could be written because SizeLimit is too + // small. Otherwise any other error code indicates unexpected failure. + if (EC == sampleprof_error::too_large) + isEmpty = true; + else if (EC) + return EC; + } + + // Read the temp file to get new profiles. Use the default empty profile if + // temp file was not written because size limit is too small. + SampleProfileMap NewProfiles; + InputBuffer = MemoryBuffer::getMemBuffer(StringRef(EmptyProfile, 17)); + DEF_VAR_RETURN_IF_ERROR( + NewReader, SampleProfileReader::create(InputBuffer, Context, *FS)); + if (!isEmpty) { + VAR_RETURN_IF_ERROR(NewReader, SampleProfileReader::create( + Temp.path().str(), Context, *FS)); + RETURN_IF_ERROR(NewReader->read()); + NewProfiles = NewReader->getProfiles(); + } + + // Check temp file is actually within size limit. + uint64_t FileSize; + RETURN_IF_ERROR(sys::fs::file_size(Temp.path(), FileSize)); + EXPECT_LE(FileSize, SizeLimit); + + // For compact binary format, function names are stored as MD5, so we cannot + // directly match the samples of the new profile with the old profile. A + // simple way is to convert the old profile to compact binary format and read + // it back + if (Format == llvm::sampleprof::SPF_Compact_Binary) { + TempFile CompBinary("compbinary", "afdo", "", true); + { + DEF_VAR_RETURN_IF_ERROR( + Writer, SampleProfileWriter::create( + CompBinary.path(), llvm::sampleprof::SPF_Compact_Binary)); + RETURN_IF_ERROR(Writer->write(OldProfiles)); + } + VAR_RETURN_IF_ERROR(Reader, SampleProfileReader::create( + CompBinary.path().str(), Context, *FS)); + RETURN_IF_ERROR(Reader->read()); + OldProfiles = Reader->getProfiles(); + } + + // For every sample in the new profile, confirm it is in the old profile and + // unchanged. + for (auto Sample : NewProfiles) { + auto FindResult = OldProfiles.find(Sample.first); + EXPECT_NE(FindResult, OldProfiles.end()); + if (FindResult != OldProfiles.end()) { + EXPECT_EQ(Sample.second.getHeadSamples(), + FindResult->second.getHeadSamples()); + EXPECT_EQ(Sample.second, FindResult->second); + } + } + return nullptr; +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitExtBinary) { + for (size_t OutputSizeLimit : {490, 489, 488, 475, 474, 459, 400}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Ext_Binary), + Succeeded()); +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitBinary) { + for (size_t OutputSizeLimit : {250, 249, 248, 237, 236, 223, 200}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Binary), + Succeeded()); +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitCompBinary) { + for (size_t OutputSizeLimit : {277, 276, 275, 264, 263, 250, 200}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Compact_Binary), + Succeeded()); +} + +TEST(TestOutputSizeLimit, TestOutputSizeLimitText) { + for (size_t OutputSizeLimit : + {229, 228, 227, 213, 212, 211, 189, 188, 187, 186, 150}) + ASSERT_THAT_EXPECTED( + RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Text), + Succeeded()); +} -- Gitee From 0b15694ed967fc843786ae35e08df88767d0b68d Mon Sep 17 00:00:00 2001 From: wlei Date: Sun, 19 Mar 2023 22:37:01 -0700 Subject: [PATCH 03/13] [AutoFDO] Use flattened profiles for profile staleness metrics For profile staleness report, before it only counts for the top-level function samples in the nested profile, the samples in the inlinees are ignored. This could affect the quality of the metrics when there are heavily inlined functions. This change adds a feature to flatten the nested profile and we're changing to use flatten profile as the input for stale profile detection and matching. Example for profile flattening: ``` Original profile: _Z3bazi:20301:1000 1: 1000 3: 2000 5: inline1:1600 1: 600 3: inline2:500 1: 500 Flattened profile: _Z3bazi:18701:1000 1: 1000 3: 2000 5: 600 inline1:600 inline1:1100:600 1: 600 3: 500 inline2: 500 inline2:500:500 1: 500 ``` This feature could be useful for offline analysis, like understanding the hotness of each individual function. So I'm adding the support to `llvm-profdata merge` under `--gen-flattened-profile`. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D146452 --- llvm/docs/CommandGuide/llvm-profdata.rst | 6 ++ llvm/include/llvm/ProfileData/SampleProf.h | 102 ++++++++++++++++-- llvm/lib/ProfileData/SampleProf.cpp | 18 ++-- llvm/lib/Transforms/IPO/SampleProfile.cpp | 28 ++++- .../Inputs/profile-mismatch-cs.prof | 18 ++++ .../Inputs/profile-mismatch.prof | 4 +- .../Transforms/SampleProfile/csspgo-inline.ll | 2 +- .../SampleProfile/csspgo-use-preinliner.ll | 2 +- .../profile-mismatch-flattened-profile.ll | 13 +++ .../SampleProfile/profile-mismatch.ll | 2 +- .../Inputs/sample-flatten-profile-cs.proftext | 20 ++++ .../Inputs/sample-flatten-profile.proftext | 44 ++++++++ .../cs-sample-nested-profile.test | 10 +- .../llvm-profdata/sample-flatten-profile.test | 50 +++++++++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 36 ++++--- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 4 +- 16 files changed, 318 insertions(+), 41 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof create mode 100644 llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext create mode 100644 llvm/test/tools/llvm-profdata/sample-flatten-profile.test diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index 89a624555fe4..1ed8531fe221 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -161,6 +161,12 @@ OPTIONS coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. +.. option:: --convert-sample-profile-layout=[nest|flat] + + Convert the merged profile into a profile with a new layout. Supported + layout are ``nest``(Nested profile, the input should be CS flat profile) and + ``flat``(Profile with nested inlinees flattened out). + .. option:: --supplement-instr-with-sample= Supplement an instrumentation profile with sample profile. The sample profile diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index faee9639ea86..8e76af2316ce 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -96,6 +96,12 @@ enum SampleProfileFormat { SPF_Binary = 0xff }; +enum SampleProfileLayout { + SPL_None = 0, + SPL_Nest = 0x1, + SPL_Flat = 0x2, +}; + static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | @@ -747,6 +753,8 @@ public: void setTotalSamples(uint64_t Num) { TotalSamples = Num; } + void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; TotalHeadSamples = @@ -934,6 +942,8 @@ public: return CallsiteSamples; } + CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; } + /// Return the maximum of sample counts in a function body. When SkipCallSite /// is false, which is the default, the return count includes samples in the /// inlined functions. When SkipCallSite is true, the return count only @@ -1274,12 +1284,16 @@ private: SampleProfileMap &ProfileMap; }; -// CSProfileConverter converts a full context-sensitive flat sample profile into -// a nested context-sensitive sample profile. -class CSProfileConverter { +/// Helper class for profile conversion. +/// +/// It supports full context-sensitive profile to nested profile conversion, +/// nested profile to flatten profile conversion, etc. +class ProfileConverter { public: - CSProfileConverter(SampleProfileMap &Profiles); - void convertProfiles(); + ProfileConverter(SampleProfileMap &Profiles); + // Convert a full context-sensitive flat sample profile into a nested sample + // profile. + void convertCSProfiles(); struct FrameNode { FrameNode(StringRef FName = StringRef(), FunctionSamples *FSamples = nullptr, @@ -1299,9 +1313,85 @@ public: StringRef CalleeName); }; + static void flattenProfile(SampleProfileMap &ProfileMap, + bool ProfileIsCS = false) { + SampleProfileMap TmpProfiles; + flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); + ProfileMap = std::move(TmpProfiles); + } + + static void flattenProfile(const SampleProfileMap &InputProfiles, + SampleProfileMap &OutputProfiles, + bool ProfileIsCS = false) { + if (ProfileIsCS) { + for (const auto &I : InputProfiles) + OutputProfiles[I.second.getName()].merge(I.second); + // Retain the profile name and clear the full context for each function + // profile. + for (auto &I : OutputProfiles) + I.second.setContext(SampleContext(I.first)); + } else { + for (const auto &I : InputProfiles) + flattenNestedProfile(OutputProfiles, I.second); + } + } + private: + static void flattenNestedProfile(SampleProfileMap &OutputProfiles, + const FunctionSamples &FS) { + // To retain the context, checksum, attributes of the original profile, make + // a copy of it if no profile is found. + SampleContext &Context = FS.getContext(); + auto Ret = OutputProfiles.emplace(Context, FS); + FunctionSamples &Profile = Ret.first->second; + if (Ret.second) { + // When it's the copy of the old profile, just clear all the inlinees' + // samples. + Profile.getCallsiteSamples().clear(); + // We recompute TotalSamples later, so here set to zero. + Profile.setTotalSamples(0); + } else { + for (const auto &Line : FS.getBodySamples()) { + Profile.addBodySamples(Line.first.LineOffset, Line.first.Discriminator, + Line.second.getSamples()); + } + } + + assert(Profile.getCallsiteSamples().empty() && + "There should be no inlinees' profiles after flattening."); + + // TotalSamples might not be equal to the sum of all samples from + // BodySamples and CallsiteSamples. So here we use "TotalSamples = + // Original_TotalSamples - All_of_Callsite_TotalSamples + + // All_of_Callsite_HeadSamples" to compute the new TotalSamples. + uint64_t TotalSamples = FS.getTotalSamples(); + + for (const auto &I : FS.getCallsiteSamples()) { + for (const auto &Callee : I.second) { + const auto &CalleeProfile = Callee.second; + // Add body sample. + Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, + CalleeProfile.getHeadSamplesEstimate()); + // Add callsite sample. + Profile.addCalledTargetSamples( + I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(), + CalleeProfile.getHeadSamplesEstimate()); + // Update total samples. + TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() + ? TotalSamples - CalleeProfile.getTotalSamples() + : 0; + TotalSamples += CalleeProfile.getHeadSamplesEstimate(); + // Recursively convert callee profile. + flattenNestedProfile(OutputProfiles, CalleeProfile); + } + } + Profile.addTotalSamples(TotalSamples); + + Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); + } + // Nest all children profiles into the profile of Node. - void convertProfiles(FrameNode &Node); + void convertCSProfiles(FrameNode &Node); FrameNode *getOrCreateContextPath(const SampleContext &Context); SampleProfileMap &ProfileMap; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index b4d5550a1721..f0a88c844cbb 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -461,9 +461,9 @@ void ProfileSymbolList::dump(raw_ostream &OS) const { OS << Sym << "\n"; } -CSProfileConverter::FrameNode * -CSProfileConverter::FrameNode::getOrCreateChildFrame( - const LineLocation &CallSite, StringRef CalleeName) { +ProfileConverter::FrameNode * +ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName) { uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildFrames.find(Hash); if (It != AllChildFrames.end()) { @@ -476,7 +476,7 @@ CSProfileConverter::FrameNode::getOrCreateChildFrame( return &AllChildFrames[Hash]; } -CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) +ProfileConverter::ProfileConverter(SampleProfileMap &Profiles) : ProfileMap(Profiles) { for (auto &FuncSample : Profiles) { FunctionSamples *FSamples = &FuncSample.second; @@ -486,8 +486,8 @@ CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) } } -CSProfileConverter::FrameNode * -CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { +ProfileConverter::FrameNode * +ProfileConverter::getOrCreateContextPath(const SampleContext &Context) { auto Node = &RootFrame; LineLocation CallSiteLoc(0, 0); for (auto &Callsite : Context.getContextFrames()) { @@ -497,14 +497,14 @@ CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { return Node; } -void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { +void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) { // Process each child profile. Add each child profile to callsite profile map // of the current node `Node` if `Node` comes with a profile. Otherwise // promote the child profile to a standalone profile. auto *NodeProfile = Node.FuncSamples; for (auto &It : Node.AllChildFrames) { auto &ChildNode = It.second; - convertProfiles(ChildNode); + convertCSProfiles(ChildNode); auto *ChildProfile = ChildNode.FuncSamples; if (!ChildProfile) continue; @@ -544,4 +544,4 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { } } -void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } +void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 93b368fd72a6..46d82244875b 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -138,6 +138,11 @@ static cl::opt PersistProfileStaleness( cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section).")); +static cl::opt FlattenProfileForMatching( + "flatten-profile-for-matching", cl::Hidden, cl::init(true), + cl::desc( + "Use flattened profile for stale profile detection and matching.")); + static cl::opt ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -428,6 +433,7 @@ class SampleProfileMatcher { Module &M; SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; + SampleProfileMap FlattenedProfiles; // Profile mismatching statstics. uint64_t TotalProfiledCallsites = 0; @@ -442,7 +448,21 @@ class SampleProfileMatcher { public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager) {} + : M(M), Reader(Reader), ProbeManager(ProbeManager) { + if (FlattenProfileForMatching) { + ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, + FunctionSamples::ProfileIsCS); + } + } + + FunctionSamples *getFlattenedSamplesFor(const Function &F) { + StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); + auto It = FlattenedProfiles.find(CanonFName); + if (It != FlattenedProfiles.end()) + return &It->second; + return nullptr; + } + void detectProfileMismatch(); void detectProfileMismatch(const Function &F, const FunctionSamples &FS); }; @@ -2149,7 +2169,11 @@ void SampleProfileMatcher::detectProfileMismatch() { for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = Reader.getSamplesFor(F); + FunctionSamples *FS = nullptr; + if (FlattenProfileForMatching) + FS = getFlattenedSamplesFor(F); + else + FS = Reader.getSamplesFor(F); if (!FS) continue; detectProfileMismatch(F, *FS); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof new file mode 100644 index 000000000000..0a04602b2a01 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof @@ -0,0 +1,18 @@ +[main]:30:0 + 0: 0 + 1.1: 0 + 3: 10 matched:10 + 4: 10 + 5: 10 bar_mismatch:10 + 7: 5 foo:5 + 8: 0 +[main:7 @ foo]:15:5 + 1: 5 + 2: 5 + 3: 5 inlinee_mismatch:5 +[bar]:10:10 + 1: 10 +[matched]:10:10 + 1: 10 +[main:7 @ foo:3 @ inlinee_mismatch]:5:5 + 1: 5 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof index 0bb17b2f8f6e..818a048b8cab 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof @@ -5,9 +5,11 @@ main:30:0 4: 10 5: 10 bar_mismatch:10 8: 0 - 7: foo:10 + 7: foo:15 1: 5 2: 5 + 3: inlinee_mismatch:5 + 1: 5 bar:10:10 1: 10 matched:10:10 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll index c88de5f56c74..177329f95497 100644 --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,7 +10,7 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll index da9c37937d2a..030b5aa18816 100644 --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE -; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll new file mode 100644 index 000000000000..ef11652fd1a8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll @@ -0,0 +1,13 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + + +; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. + +; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll index cf07974da27f..8340c3b0e62d 100644 --- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll ; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD ; RUN: llc < %t.ll -filetype=obj -o %t.obj diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext new file mode 100644 index 000000000000..5cd880b63baa --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext @@ -0,0 +1,20 @@ +[baz]:150:10 + 1: 10 + 3: 20 + 5: 20 foo:20 +[foo]:102:1 + 1: 1 + 3: 1 +[main]:91:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: 3 baz:2 foo:1 +[main:10 @ foo]:2:1 + 3: 1 bar:1 + 4: 1 +[bar]:1:1 + 1: 1 +[main:10 @ foo:3 @ bar]:1:1 + 1: 1 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext new file mode 100644 index 000000000000..46564f65121e --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext @@ -0,0 +1,44 @@ +baz:160:10 + 1: 10 + 3: 20 + 5: foo:30 + 1: 20 + 3: bar:10 + 1: 10 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + !CFGChecksum: 1 + !Attributes: 1 +main:110:1 + 4: 1 + 4.2: 1 + 7: 1 + 9: 3 bar:2 foo:1 + 10: foo:2 + 4: 1 + 3: bar:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 3 + !Attributes: 3 + 10: baz:20 + 10: 1 + 6: bar:3 + 1: 2 + 7: 1 + !CFGChecksum: 4 + !Attributes: 4 + !CFGChecksum: 2 + !Attributes: 2 +foo:102:1 + 1: 1 + 3: 1 + !CFGChecksum: 3 + !Attributes: 3 +bar:1:1 + 1: 1 + !CFGChecksum: 4 + !Attributes: 4 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test index d458d7fad1be..7b0132421911 100644 --- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -1,14 +1,14 @@ -RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE -RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE -RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT -RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1 +RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST diff --git a/llvm/test/tools/llvm-profdata/sample-flatten-profile.test b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test new file mode 100644 index 000000000000..90effcb25190 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test @@ -0,0 +1,50 @@ +; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace + +; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile-cs.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile-cs.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS + +; CHECK:baz:169:10 +; CHECK-NEXT: 1: 10 +; CHECK-NEXT: 3: 20 +; CHECK-NEXT: 5: 20 foo:20 +; CHECK-NEXT: 6: 2 bar:2 +; CHECK-NEXT: 10: 1 +; CHECK-NEXT: !CFGChecksum: 1 +; CHECK-NEXT: !Attributes: 1 +; CHECK-NEXT:foo:134:21 +; CHECK-NEXT: 1: 21 +; CHECK-NEXT: 3: 12 bar:11 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: !CFGChecksum: 3 +; CHECK-NEXT: !Attributes: 3 +; CHECK-NEXT:main:91:1 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: 4.2: 1 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: 9: 3 bar:2 foo:1 +; CHECK-NEXT: 10: 3 baz:2 foo:1 +; CHECK-NEXT: !CFGChecksum: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT:bar:15:14 +; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 7: 1 +; CHECK-NEXT: !CFGChecksum: 4 +; CHECK-NEXT: !Attributes: 4 + +; CHECK-CS:baz:150:10 +; CHECK-CS-NEXT: 1: 10 +; CHECK-CS-NEXT: 3: 20 +; CHECK-CS-NEXT: 5: 20 foo:20 +; CHECK-CS-NEXT:foo:104:2 +; CHECK-CS-NEXT: 1: 1 +; CHECK-CS-NEXT: 3: 2 bar:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT:main:91:1 +; CHECK-CS-NEXT: 4: 1 +; CHECK-CS-NEXT: 4.2: 1 +; CHECK-CS-NEXT: 7: 1 +; CHECK-CS-NEXT: 9: 3 bar:2 foo:1 +; CHECK-CS-NEXT: 10: 3 baz:2 foo:1 +; CHECK-CS-NEXT:bar:2:2 +; CHECK-CS-NEXT: 1: 2 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index eafeb299c52d..a976f0604c1f 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -963,7 +963,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, + bool UseMD5, bool GenPartialProfile, + SampleProfileLayout ProfileLayout, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode, bool DropProfileSymbolList, size_t OutputSizeLimit) { @@ -1042,9 +1043,12 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, SampleMergeColdContext, SampleColdContextFrameDepth, false); } - if (ProfileIsCS && GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + if (ProfileLayout == llvm::sampleprof::SPL_Flat) { + ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS); + ProfileIsCS = FunctionSamples::ProfileIsCS = false; + } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) { + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); ProfileIsCS = FunctionSamples::ProfileIsCS = false; } @@ -1235,9 +1239,15 @@ static int merge_main(int argc, const char *argv[]) { "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); + cl::opt ProfileLayout( + "convert-sample-profile-layout", + cl::desc("Convert the generated profile to a profile with a new layout"), + cl::init(SPL_None), + cl::values( + clEnumValN(SPL_Nest, "nest", + "Nested profile, the input should be CS flat profile"), + clEnumValN(SPL_Flat, "flat", + "Profile with nested inlinee flatten out"))); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); @@ -1292,12 +1302,12 @@ static int merge_main(int argc, const char *argv[]) { OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode, ProfiledBinary); else - mergeSampleProfile( - WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, - ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, - GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList, - OutputSizeLimit); + mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, + OutputFormat, ProfileSymbolListFile, CompressAllSections, + UseMD5, GenPartialProfile, ProfileLayout, + SampleMergeColdContext, SampleTrimColdContext, + SampleColdContextFrameDepth, FailureMode, + DropProfileSymbolList, OutputSizeLimit); return 0; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index dfc42a5f4e02..2728f80da64b 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -1026,8 +1026,8 @@ void CSProfileGenerator::postProcessProfiles() { calculateAndShowDensity(ContextLessProfiles); if (GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); + ProfileConverter CSConverter(ProfileMap); + CSConverter.convertCSProfiles(); FunctionSamples::ProfileIsCS = false; } } -- Gitee From 1325d813a3ea0d747328703c8cecda33ec46746e Mon Sep 17 00:00:00 2001 From: wlei Date: Mon, 27 Mar 2023 11:21:24 -0700 Subject: [PATCH 04/13] [SamplePGO] Stale profile matching(part 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AutoFDO/CSSPGO often has to deal with stale profiles collected on binaries built from several revisions behind release. It’s likely to get incorrect profile annotations using the stale profile, which results in unstable or low performing binaries. Currently for source location based profile, once a code change causes a profile mismatch, all the locations afterward are mismatched, the affected samples or inlining info are lost. If we can provide a matching framework to reuse parts of the mismatched profile - aka incremental PGO, it will make PGO more stable, also increase the optimization coverage and boost the performance of binary. This patch is the part 1 of stale profile matching, summary of the implementation: - Added a structure for the matching result:`LocToLocMap`, which is a location to location map meaning the location of current build is matched to the location of the previous build(to be used to query the “stale” profile). - In order to use the matching results for sample query, we need to pass them to all the location queries. For code cleanliness, we added a new pointer field(`IRToProfileLocationMap`) to `FunctionSamples`. - Added a wrapper(`mapIRLocToProfileLoc`) for the query to the location, the location from input IR will be remapped to the matched profile location. - Added a new switch `--salvage-stale-profile`. - Some refactoring for the staleness detection. Test case is in part 2 with the matching algorithm. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D147456 --- llvm/include/llvm/ProfileData/SampleProf.h | 51 ++++++++- llvm/lib/ProfileData/SampleProf.cpp | 2 +- llvm/lib/Transforms/IPO/SampleProfile.cpp | 118 ++++++++++++--------- 3 files changed, 117 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 8e76af2316ce..4b57dbafada0 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -723,6 +723,8 @@ using BodySampleMap = std::map; // memory, which is *very* significant for large profiles. using FunctionSamplesMap = std::map>; using CallsiteSampleMap = std::map; +using LocToLocMap = + std::unordered_map; /// Representation of the samples collected for a function. /// @@ -837,12 +839,26 @@ public: } } + // Query the stale profile matching results and remap the location. + const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { + // There is no remapping if the profile is not stale or the matching gives + // the same location. + if (!IRToProfileLocationMap) + return IRLoc; + const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); + if (ProfileLoc != IRToProfileLocationMap->end()) + return ProfileLoc->second; + else + return IRLoc; + } + /// Return the number of samples collected at the given location. /// Each location is specified by \p LineOffset and \p Discriminator. /// If the location is not found in profile, return error. ErrorOr findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const { - const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + const auto &ret = BodySamples.find( + mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); if (ret == BodySamples.end()) return std::error_code(); return ret->second.getSamples(); @@ -853,7 +869,8 @@ public: /// If the location is not found in profile, return error. ErrorOr findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { - const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + const auto &ret = BodySamples.find( + mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); if (ret == BodySamples.end()) return std::error_code(); return ret->second.getCallTargets(); @@ -863,7 +880,7 @@ public: /// CallSite. If the location is not found in profile, return error. ErrorOr findCallTargetMapAt(const LineLocation &CallSite) const { - const auto &Ret = BodySamples.find(CallSite); + const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite)); if (Ret == BodySamples.end()) return std::error_code(); return Ret->second.getCallTargets(); @@ -871,13 +888,13 @@ public: /// Return the function samples at the given callsite location. FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { - return CallsiteSamples[Loc]; + return CallsiteSamples[mapIRLocToProfileLoc(Loc)]; } /// Returns the FunctionSamplesMap at the given \p Loc. const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const { - auto iter = CallsiteSamples.find(Loc); + auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); if (iter == CallsiteSamples.end()) return nullptr; return &iter->second; @@ -1041,6 +1058,11 @@ public: uint64_t getFunctionHash() const { return FunctionHash; } + void setIRToProfileLocationMap(const LocToLocMap *LTLM) { + assert(IRToProfileLocationMap == nullptr && "this should be set only once"); + IRToProfileLocationMap = LTLM; + } + /// Return the canonical name for a function, taking into account /// suffix elision policy attributes. static StringRef getCanonicalFnName(const Function &F) { @@ -1224,6 +1246,25 @@ private: /// in the call to bar() at line offset 1, the other for all the samples /// collected in the call to baz() at line offset 8. CallsiteSampleMap CallsiteSamples; + + /// IR to profile location map generated by stale profile matching. + /// + /// Each entry is a mapping from the location on current build to the matched + /// location in the "stale" profile. For example: + /// Profiled source code: + /// void foo() { + /// 1 bar(); + /// } + /// + /// Current source code: + /// void foo() { + /// 1 // Code change + /// 2 bar(); + /// } + /// Supposing the stale profile matching algorithm generated the mapping [2 -> + /// 1], the profile query using the location of bar on the IR which is 2 will + /// be remapped to 1 and find the location of bar in the profile. + const LocToLocMap *IRToProfileLocationMap = nullptr; }; raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index f0a88c844cbb..780f517d3e79 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -291,7 +291,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt( std::string CalleeGUID; CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); - auto iter = CallsiteSamples.find(Loc); + auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); if (iter == CallsiteSamples.end()) return nullptr; auto FS = iter->second.find(CalleeName); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 46d82244875b..d5fcd9121d80 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -129,6 +129,11 @@ static cl::opt SampleProfileRemappingFile( "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden); +static cl::opt SalvageStaleProfile( + "salvage-stale-profile", cl::Hidden, cl::init(false), + cl::desc("Salvage stale profile by fuzzy matching and use the remapped " + "location for sample profile query.")); + static cl::opt ReportProfileStaleness( "report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics.")); @@ -454,7 +459,9 @@ public: FunctionSamples::ProfileIsCS); } } + void runOnModule(); +private: FunctionSamples *getFlattenedSamplesFor(const Function &F) { StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); auto It = FlattenedProfiles.find(CanonFName); @@ -462,9 +469,11 @@ public: return &It->second; return nullptr; } - - void detectProfileMismatch(); - void detectProfileMismatch(const Function &F, const FunctionSamples &FS); + void runOnFunction(const Function &F, const FunctionSamples &FS); + void countProfileMismatches( + const FunctionSamples &FS, + const std::unordered_set + &MatchedCallsiteLocs); }; /// Sample profile pass. @@ -2066,7 +2075,8 @@ bool SampleProfileLoader::doInitialization(Module &M, } } - if (ReportProfileStaleness || PersistProfileStaleness) { + if (ReportProfileStaleness || PersistProfileStaleness || + SalvageStaleProfile) { MatchingManager = std::make_unique(M, *Reader, ProbeManager.get()); } @@ -2074,8 +2084,53 @@ bool SampleProfileLoader::doInitialization(Module &M, return true; } -void SampleProfileMatcher::detectProfileMismatch(const Function &F, - const FunctionSamples &FS) { +void SampleProfileMatcher::countProfileMismatches( + const FunctionSamples &FS, + const std::unordered_set + &MatchedCallsiteLocs) { + + auto isInvalidLineOffset = [](uint32_t LineOffset) { + return LineOffset & 0x8000; + }; + + // Check if there are any callsites in the profile that does not match to any + // IR callsites, those callsite samples will be discarded. + for (auto &I : FS.getBodySamples()) { + const LineLocation &Loc = I.first; + if (isInvalidLineOffset(Loc.LineOffset)) + continue; + + uint64_t Count = I.second.getSamples(); + if (!I.second.getCallTargets().empty()) { + TotalCallsiteSamples += Count; + TotalProfiledCallsites++; + if (!MatchedCallsiteLocs.count(Loc)) { + MismatchedCallsiteSamples += Count; + NumMismatchedCallsites++; + } + } + } + + for (auto &I : FS.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + if (isInvalidLineOffset(Loc.LineOffset)) + continue; + + uint64_t Count = 0; + for (auto &FM : I.second) { + Count += FM.second.getHeadSamplesEstimate(); + } + TotalCallsiteSamples += Count; + TotalProfiledCallsites++; + if (!MatchedCallsiteLocs.count(Loc)) { + MismatchedCallsiteSamples += Count; + NumMismatchedCallsites++; + } + } +} + +void SampleProfileMatcher::runOnFunction(const Function &F, + const FunctionSamples &FS) { if (FunctionSamples::ProfileIsProbeBased) { uint64_t Count = FS.getTotalSamples(); TotalFuncHashSamples += Count; @@ -2125,47 +2180,12 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F, } } - auto isInvalidLineOffset = [](uint32_t LineOffset) { - return LineOffset & 0x8000; - }; - - // Check if there are any callsites in the profile that does not match to any - // IR callsites, those callsite samples will be discarded. - for (auto &I : FS.getBodySamples()) { - const LineLocation &Loc = I.first; - if (isInvalidLineOffset(Loc.LineOffset)) - continue; - - uint64_t Count = I.second.getSamples(); - if (!I.second.getCallTargets().empty()) { - TotalCallsiteSamples += Count; - TotalProfiledCallsites++; - if (!MatchedCallsiteLocs.count(Loc)) { - MismatchedCallsiteSamples += Count; - NumMismatchedCallsites++; - } - } - } - - for (auto &I : FS.getCallsiteSamples()) { - const LineLocation &Loc = I.first; - if (isInvalidLineOffset(Loc.LineOffset)) - continue; - - uint64_t Count = 0; - for (auto &FM : I.second) { - Count += FM.second.getHeadSamplesEstimate(); - } - TotalCallsiteSamples += Count; - TotalProfiledCallsites++; - if (!MatchedCallsiteLocs.count(Loc)) { - MismatchedCallsiteSamples += Count; - NumMismatchedCallsites++; - } - } + // Detect profile mismatch for profile staleness metrics report. + if (ReportProfileStaleness || PersistProfileStaleness) + countProfileMismatches(FS, MatchedCallsiteLocs); } -void SampleProfileMatcher::detectProfileMismatch() { +void SampleProfileMatcher::runOnModule() { for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; @@ -2176,7 +2196,7 @@ void SampleProfileMatcher::detectProfileMismatch() { FS = Reader.getSamplesFor(F); if (!FS) continue; - detectProfileMismatch(F, *FS); + runOnFunction(F, *FS); } if (ReportProfileStaleness) { @@ -2264,8 +2284,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, assert(SymbolMap.count(StringRef()) == 0 && "No empty StringRef should be added in SymbolMap"); - if (ReportProfileStaleness || PersistProfileStaleness) - MatchingManager->detectProfileMismatch(); + if (ReportProfileStaleness || PersistProfileStaleness || + SalvageStaleProfile) { + MatchingManager->runOnModule(); + } bool retval = false; for (auto *F : buildFunctionOrder(M, CG)) { -- Gitee From a70f04ebc875ed9e731ab7dbf1dc51aa8ab89afd Mon Sep 17 00:00:00 2001 From: wlei Date: Wed, 5 Apr 2023 21:21:59 -0700 Subject: [PATCH 05/13] [SamplePGO] Stale profile matching(part 2) Part 2 of https://reviews.llvm.org/D147456 Use callee name on IR as an anchor to match the call target/inlinee name in the profile. The advantages of this in particular: - Different from the traditional way of encoding hash signatures to every block that would affect binary/profile size and build speed, it doesn't require any additional information for this, all the data is already in the IR and profiles. - Effective for current nested profile layout in which once a callsite is mismatched all the inlinee's profiles are dropped. **The input of the algorithm:** - IR locations: the anchor is the callee name of direct callsite. - Profile locations: the anchor is the call target name for `BodySample`s or inlinee's profile name for `CallsiteSamples`. The two lists are populated by parsing the IR and profile and both can be generalized as a sequence of locations with an optional anchor. For example: say location `1.2(foo)` refers to a callsite at `1.2` with callee name `foo` and `1.3` refers to a non-directcall location `1.3`. ``` // The current build source code: int main() { 1. ... 2. foo(); 3. ... 4 ... 5. ... 6. bar(); 7. ... } ``` IR locations are populated and simplified as: `[1, 2(foo), 3, 5, 6(bar), 7]`. ``` ; The "stale" profile: main:350:1 1: 1 2: 3 3: 100 foo:100 4: 2 7: 2 8: 200 bar:200 9: 30 ``` Profile locations are populated and simplified as `[1, 2, 3(foo), 4, 7, 8(bar), 9]` **Matching heuristic:** - Match all the anchors in lexical order first. - Match non-anchors evenly between two anchors: Split the non-anchor range, the first half is matched based on the start anchor, the second half is matched based on the end anchor. So the example above is matched like: ``` [1, 2(foo), 3, 5, 6(bar), 7] | | | | | | [1, 2, 3(foo), 4, 7, 8(bar), 9] ``` 3 -> 4 matching is based on anchor `foo`, 5 -> 7 matching is based on anchor `bar`. The output mapping of matching is [2->3, 3->4, 5->7, 6->8, 7->9]. For the implementation, the anchors are saved in a map for fast look-up. The result mapping is saved into `IRToProfileLocationMap`(see https://reviews.llvm.org/D147456) and distributed to all FunctionSamples(`distributeIRToProfileLocationMap`) **Clang-self build benchmark: ** Current build version: clang-10 The profiled version: clang-9 Results compared to a refresh profile(collected profile on clang-10) and to be fair, we invalidated new functions' profiles(both refresh and stale profile use the same profile list). 1) Regression to using refresh profile with this off : -3.93% 2) Regression to using refresh profile with this on : -1.1% So this algorithm can recover ~72% of the regression. **Internal(Meta) large-scale services.** we saw one real instance of a 3 week stale profile., it delivered a ~1.8% win. **Notes or future work:** - Classic AutoFDO support: the current version only supports pseudo-probe, but I believe it's not hard to extend to classic line-number based AutoFDO since pseudo-probe and line-number are shared the LineLocation structure. - The fuzzy matching is an open-ended area and there could be more heuristics to try out, but since the current version already recovers a reasonable percentage of regression(with some pseudo probe order change, it can recover close to 90%), I'm submitting the patch for review and we will try more heuristics in future. - Profile call target name are only available when the call is hit by samples, the missing anchor might mislead the matching, this can be mitigated in llvm-profgen to generate the call target for the zero samples. - This doesn't handle function name mismatch, we plan to solve it in future. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D147545 --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 232 +++++++++++- .../pseudo-probe-stale-profile-matching.prof | 28 ++ .../pseudo-probe-stale-profile-matching.ll | 342 ++++++++++++++++++ 3 files changed, 589 insertions(+), 13 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index d5fcd9121d80..66d7ee54d89f 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -439,6 +439,10 @@ class SampleProfileMatcher { SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; SampleProfileMap FlattenedProfiles; + // For each function, the matcher generates a map, of which each entry is a + // mapping from the source location of current build to the source location in + // the profile. + StringMap FuncMappings; // Profile mismatching statstics. uint64_t TotalProfiledCallsites = 0; @@ -473,7 +477,23 @@ private: void countProfileMismatches( const FunctionSamples &FS, const std::unordered_set - &MatchedCallsiteLocs); + &MatchedCallsiteLocs, + uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites); + + LocToLocMap &getIRToProfileLocationMap(const Function &F) { + auto Ret = FuncMappings.try_emplace( + FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap()); + return Ret.first->second; + } + void distributeIRToProfileLocationMap(); + void distributeIRToProfileLocationMap(FunctionSamples &FS); + void populateProfileCallsites( + const FunctionSamples &FS, + StringMap> &CalleeToCallsitesMap); + void runStaleProfileMatching( + const std::map &IRLocations, + StringMap> &CalleeToCallsitesMap, + LocToLocMap &IRToProfileLocationMap); }; /// Sample profile pass. @@ -1813,9 +1833,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { if (!ProbeManager->profileIsValid(F, *Samples)) { LLVM_DEBUG( dbgs() << "Profile is invalid due to CFG mismatch for Function " - << F.getName()); + << F.getName() << "\n"); ++NumMismatchedProfile; - return false; + if (!SalvageStaleProfile) + return false; } ++NumMatchedProfile; } else { @@ -2087,7 +2108,8 @@ bool SampleProfileLoader::doInitialization(Module &M, void SampleProfileMatcher::countProfileMismatches( const FunctionSamples &FS, const std::unordered_set - &MatchedCallsiteLocs) { + &MatchedCallsiteLocs, + uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) { auto isInvalidLineOffset = [](uint32_t LineOffset) { return LineOffset & 0x8000; @@ -2103,10 +2125,10 @@ void SampleProfileMatcher::countProfileMismatches( uint64_t Count = I.second.getSamples(); if (!I.second.getCallTargets().empty()) { TotalCallsiteSamples += Count; - TotalProfiledCallsites++; + FuncProfiledCallsites++; if (!MatchedCallsiteLocs.count(Loc)) { MismatchedCallsiteSamples += Count; - NumMismatchedCallsites++; + FuncMismatchedCallsites++; } } } @@ -2121,16 +2143,129 @@ void SampleProfileMatcher::countProfileMismatches( Count += FM.second.getHeadSamplesEstimate(); } TotalCallsiteSamples += Count; - TotalProfiledCallsites++; + FuncProfiledCallsites++; if (!MatchedCallsiteLocs.count(Loc)) { MismatchedCallsiteSamples += Count; - NumMismatchedCallsites++; + FuncMismatchedCallsites++; + } + } +} + +// Populate the anchors(direct callee name) from profile. +void SampleProfileMatcher::populateProfileCallsites( + const FunctionSamples &FS, + StringMap> &CalleeToCallsitesMap) { + for (const auto &I : FS.getBodySamples()) { + const auto &Loc = I.first; + const auto &CTM = I.second.getCallTargets(); + // Filter out possible indirect calls, use direct callee name as anchor. + if (CTM.size() == 1) { + StringRef CalleeName = CTM.begin()->first(); + const auto &Candidates = CalleeToCallsitesMap.try_emplace( + CalleeName, std::set()); + Candidates.first->second.insert(Loc); + } + } + + for (const auto &I : FS.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + const auto &CalleeMap = I.second; + // Filter out possible indirect calls, use direct callee name as anchor. + if (CalleeMap.size() == 1) { + StringRef CalleeName = CalleeMap.begin()->first; + const auto &Candidates = CalleeToCallsitesMap.try_emplace( + CalleeName, std::set()); + Candidates.first->second.insert(Loc); + } + } +} + +// Call target name anchor based profile fuzzy matching. +// Input: +// For IR locations, the anchor is the callee name of direct callsite; For +// profile locations, it's the call target name for BodySamples or inlinee's +// profile name for CallsiteSamples. +// Matching heuristic: +// First match all the anchors in lexical order, then split the non-anchor +// locations between the two anchors evenly, first half are matched based on the +// start anchor, second half are matched based on the end anchor. +// For example, given: +// IR locations: [1, 2(foo), 3, 5, 6(bar), 7] +// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] +// The matching gives: +// [1, 2(foo), 3, 5, 6(bar), 7] +// | | | | | | +// [1, 2, 3(foo), 4, 7, 8(bar), 9] +// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. +void SampleProfileMatcher::runStaleProfileMatching( + const std::map &IRLocations, + StringMap> &CalleeToCallsitesMap, + LocToLocMap &IRToProfileLocationMap) { + assert(IRToProfileLocationMap.empty() && + "Run stale profile matching only once per function"); + + auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { + // Skip the unchanged location mapping to save memory. + if (From != To) + IRToProfileLocationMap.insert({From, To}); + }; + + // Use function's beginning location as the initial anchor. + int32_t LocationDelta = 0; + SmallVector LastMatchedNonAnchors; + + for (const auto &IR : IRLocations) { + const auto &Loc = IR.first; + StringRef CalleeName = IR.second; + bool IsMatchedAnchor = false; + // Match the anchor location in lexical order. + if (!CalleeName.empty()) { + auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName); + if (ProfileAnchors != CalleeToCallsitesMap.end() && + !ProfileAnchors->second.empty()) { + auto CI = ProfileAnchors->second.begin(); + const auto &Candidate = *CI; + ProfileAnchors->second.erase(CI); + InsertMatching(Loc, Candidate); + LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName + << " is matched from " << Loc << " to " << Candidate + << "\n"); + LocationDelta = Candidate.LineOffset - Loc.LineOffset; + + // Match backwards for non-anchor locations. + // The locations in LastMatchedNonAnchors have been matched forwards + // based on the previous anchor, spilt it evenly and overwrite the + // second half based on the current anchor. + for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; + I < LastMatchedNonAnchors.size(); I++) { + const auto &L = LastMatchedNonAnchors[I]; + uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; + LineLocation Candidate(CandidateLineOffset, L.Discriminator); + InsertMatching(L, Candidate); + LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L + << " to " << Candidate << "\n"); + } + + IsMatchedAnchor = true; + LastMatchedNonAnchors.clear(); + } + } + + // Match forwards for non-anchor locations. + if (!IsMatchedAnchor) { + uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; + LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); + InsertMatching(Loc, Candidate); + LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " + << Candidate << "\n"); + LastMatchedNonAnchors.emplace_back(Loc); } } } void SampleProfileMatcher::runOnFunction(const Function &F, const FunctionSamples &FS) { + bool IsFuncHashMismatch = false; if (FunctionSamples::ProfileIsProbeBased) { uint64_t Count = FS.getTotalSamples(); TotalFuncHashSamples += Count; @@ -2138,16 +2273,24 @@ void SampleProfileMatcher::runOnFunction(const Function &F, if (!ProbeManager->profileIsValid(F, FS)) { MismatchedFuncHashSamples += Count; NumMismatchedFuncHash++; - return; + IsFuncHashMismatch = true; } } std::unordered_set MatchedCallsiteLocs; + // The value of the map is the name of direct callsite and use empty StringRef + // for non-direct-call site. + std::map IRLocations; - // Go through all the callsites on the IR and flag the callsite if the target - // name is the same as the one in the profile. + // Extract profile matching anchors and profile mismatch metrics in the IR. for (auto &BB : F) { for (auto &I : BB) { + // TODO: Support line-number based location(AutoFDO). + if (FunctionSamples::ProfileIsProbeBased && isa(&I)) { + if (std::optional Probe = extractProbe(I)) + IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef()); + } + if (!isa(&I) || isa(&I)) continue; @@ -2159,6 +2302,17 @@ void SampleProfileMatcher::runOnFunction(const Function &F, if (Function *Callee = CB->getCalledFunction()) CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); + // Force to overwrite the callee name in case any non-call location was + // written before. + auto R = IRLocations.emplace(IRCallsite, CalleeName); + R.first->second = CalleeName; + assert((!FunctionSamples::ProfileIsProbeBased || R.second || + R.first->second == CalleeName) && + "Overwrite non-call or different callee name location for " + "pseudo probe callsite"); + + // Go through all the callsites on the IR and flag the callsite if the + // target name is the same as the one in the profile. const auto CTM = FS.findCallTargetMapAt(IRCallsite); const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite); @@ -2181,8 +2335,36 @@ void SampleProfileMatcher::runOnFunction(const Function &F, } // Detect profile mismatch for profile staleness metrics report. - if (ReportProfileStaleness || PersistProfileStaleness) - countProfileMismatches(FS, MatchedCallsiteLocs); + if (ReportProfileStaleness || PersistProfileStaleness) { + uint64_t FuncMismatchedCallsites = 0; + uint64_t FuncProfiledCallsites = 0; + countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites, + FuncProfiledCallsites); + TotalProfiledCallsites += FuncProfiledCallsites; + NumMismatchedCallsites += FuncMismatchedCallsites; + LLVM_DEBUG({ + if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch && + FuncMismatchedCallsites) + dbgs() << "Function checksum is matched but there are " + << FuncMismatchedCallsites << "/" << FuncProfiledCallsites + << " mismatched callsites.\n"; + }); + } + + if (IsFuncHashMismatch && SalvageStaleProfile) { + LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() + << "\n"); + + StringMap> CalleeToCallsitesMap; + populateProfileCallsites(FS, CalleeToCallsitesMap); + + // The matching result will be saved to IRToProfileLocationMap, create a new + // map for each function. + auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); + + runStaleProfileMatching(IRLocations, CalleeToCallsitesMap, + IRToProfileLocationMap); + } } void SampleProfileMatcher::runOnModule() { @@ -2198,6 +2380,8 @@ void SampleProfileMatcher::runOnModule() { continue; runOnFunction(F, *FS); } + if (SalvageStaleProfile) + distributeIRToProfileLocationMap(); if (ReportProfileStaleness) { if (FunctionSamples::ProfileIsProbeBased) { @@ -2240,6 +2424,28 @@ void SampleProfileMatcher::runOnModule() { } } +void SampleProfileMatcher::distributeIRToProfileLocationMap( + FunctionSamples &FS) { + const auto ProfileMappings = FuncMappings.find(FS.getName()); + if (ProfileMappings != FuncMappings.end()) { + FS.setIRToProfileLocationMap(&(ProfileMappings->second)); + } + + for (auto &Inlinees : FS.getCallsiteSamples()) { + for (auto FS : Inlinees.second) { + distributeIRToProfileLocationMap(FS.second); + } + } +} + +// Use a central place to distribute the matching results. Outlined and inlined +// profile with the function name will be set to the same pointer. +void SampleProfileMatcher::distributeIRToProfileLocationMap() { + for (auto &I : Reader.getProfiles()) { + distributeIRToProfileLocationMap(I.second); + } +} + bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof new file mode 100644 index 000000000000..4d6241bb8568 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof @@ -0,0 +1,28 @@ +main:1497:0 + 1: 0 + 2: 112 + 3: 112 bar:60 dummy_calltarget:50 + 4: 116 + 5: 0 + 7: 124 bar:124 + 9: 126 bar:126 + 6: foo:452 + 1: 112 + 2: 101 + 3: 13 + 4: 112 + 5: 101 bar:109 + 6: 13 bar:14 + !CFGChecksum: 563022570642068 + 8: foo:472 + 1: 117 + 2: 104 + 3: 13 + 4: 121 + 5: 104 bar:104 + 6: 14 bar:14 + !CFGChecksum: 563022570642068 + !CFGChecksum: 1125988587804525 +bar:491:491 + 1: 491 + !CFGChecksum: 4294967295 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll new file mode 100644 index 000000000000..54dbc1d50d41 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll @@ -0,0 +1,342 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile 2>&1 | FileCheck %s + +; The profiled source code: + +; volatile int x = 1; +; __attribute__((noinline)) int bar(int p) { +; return p; +; } + +; __attribute__((always_inline)) int foo(int i, int p) { +; if (i % 10) return bar(p); +; else return bar(p + 1); +; } + +; int main() { +; for (int i = 0; i < 1000 * 1000; i++) { +; x += foo(i, x); +; x += bar(x); +; x += foo(i, x); +; x += bar(x); +; } +; } + +; The source code for the current build: + +; volatile int x = 1; +; __attribute__((noinline)) int bar(int p) { +; return p; +; } + +; __attribute__((always_inline)) int foo(int i, int p) { +; if (i % 10) return bar(p); +; else return bar(p + 1); +; } + +; int main() { +; if (x == 0) // code change +; return 0; // code change +; for (int i = 0; i < 1000 * 1000; i++) { +; x += foo(i, x); +; x += bar(x); +; if (i < 0) // code change +; return 0; // code change +; x += foo(i, x); +; x += bar(x); +; } +; } + + +; CHECK: Run stale profile matching for main + +; CHECK: Location is matched from 1 to 1 +; CHECK: Location is matched from 2 to 2 +; CHECK: Location is matched from 3 to 3 +; CHECK: Location is matched from 4 to 4 +; CHECK: Location is matched from 5 to 5 +; CHECK: Location is matched from 6 to 6 +; CHECK: Location is matched from 7 to 7 +; CHECK: Location is matched from 8 to 8 +; CHECK: Location is matched from 9 to 9 +; CHECK: Location is matched from 10 to 10 +; CHECK: Location is matched from 11 to 11 + +; CHECK: Callsite with callee:foo is matched from 13 to 6 +; CHECK: Location is rematched backwards from 7 to 0 +; CHECK: Location is rematched backwards from 8 to 1 +; CHECK: Location is rematched backwards from 9 to 2 +; CHECK: Location is rematched backwards from 10 to 3 +; CHECK: Location is rematched backwards from 11 to 4 +; CHECK: Callsite with callee:bar is matched from 14 to 7 +; CHECK: Callsite with callee:foo is matched from 15 to 8 +; CHECK: Callsite with callee:bar is matched from 16 to 9 + + +; CHECK: 2: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 3: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 4: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00) +; CHECK: 5: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) +; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 101 - factor: 1.00) +; CHECK: 5: %call.i3 = call i32 @bar(i32 noundef %1), !dbg ![[#]] - weight: 101 - factor: 1.00) +; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00) +; CHECK: 6: %call1.i6 = call i32 @bar(i32 noundef %add.i5), !dbg ![[#]] - weight: 13 - factor: 1.00) +; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 14: %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00) +; CHECK: 8: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) +; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00) +; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00) +; CHECK: 5: %call.i = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 104 - factor: 1.00) +; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00) +; CHECK: 6: %call1.i = call i32 @bar(i32 noundef %add.i), !dbg ![[#]] - weight: 14 - factor: 1.00) +; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 121 - factor: 1.00) +; CHECK: 16: %call9 = call i32 @bar(i32 noundef %7), !dbg ![[#]] - weight: 126 - factor: 1.00) +; CHECK: 9: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 10: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) +; CHECK: 11: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00) +; CHECK: 1: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 1, align 4, !dbg !0 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @bar(i32 noundef %p) #0 !dbg !16 { +entry: + call void @llvm.dbg.value(metadata i32 %p, metadata !20, metadata !DIExpression()), !dbg !21 + call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !22 + ret i32 %p, !dbg !23 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: alwaysinline nounwind uwtable +define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #2 !dbg !24 { +entry: + call void @llvm.dbg.value(metadata i32 %i, metadata !28, metadata !DIExpression()), !dbg !30 + call void @llvm.dbg.value(metadata i32 %p, metadata !29, metadata !DIExpression()), !dbg !30 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !31 + %rem = srem i32 %i, 10, !dbg !33 + %tobool = icmp ne i32 %rem, 0, !dbg !33 + br i1 %tobool, label %if.then, label %if.else, !dbg !34 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !35 + %call = call i32 @bar(i32 noundef %p), !dbg !36 + br label %return, !dbg !38 + +if.else: ; preds = %entry + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg !39 + %add = add nsw i32 %p, 1, !dbg !40 + %call1 = call i32 @bar(i32 noundef %add), !dbg !41 + br label %return, !dbg !43 + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !44 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !45 + ret i32 %retval.0, !dbg !45 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #3 !dbg !46 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !52 + %0 = load volatile i32, ptr @x, align 4, !dbg !52, !tbaa !54 + %cmp = icmp eq i32 %0, 0, !dbg !58 + br i1 %cmp, label %if.then, label %if.end, !dbg !59 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !60 + br label %for.end, !dbg !60 + +if.end: ; preds = %entry + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !61 + call void @llvm.dbg.value(metadata i32 0, metadata !50, metadata !DIExpression()), !dbg !62 + br label %for.cond, !dbg !63 + +for.cond: ; preds = %if.end6, %if.end + %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !64 + call void @llvm.dbg.value(metadata i32 %i.0, metadata !50, metadata !DIExpression()), !dbg !62 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !65 + %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !67 + br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !68 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg !68 + br label %cleanup, !dbg !68 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !69 + %1 = load volatile i32, ptr @x, align 4, !dbg !71, !tbaa !54 + %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !72 + %2 = load volatile i32, ptr @x, align 4, !dbg !74, !tbaa !54 + %add = add nsw i32 %2, %call, !dbg !74 + store volatile i32 %add, ptr @x, align 4, !dbg !74, !tbaa !54 + %3 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !54 + %call2 = call i32 @bar(i32 noundef %3), !dbg !76 + %4 = load volatile i32, ptr @x, align 4, !dbg !78, !tbaa !54 + %add3 = add nsw i32 %4, %call2, !dbg !78 + store volatile i32 %add3, ptr @x, align 4, !dbg !78, !tbaa !54 + br i1 false, label %if.then5, label %if.end6, !dbg !79 + +if.then5: ; preds = %for.body + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !80 + br label %cleanup, !dbg !80 + +if.end6: ; preds = %for.body + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !82 + %5 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !54 + %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !84 + %6 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !54 + %add8 = add nsw i32 %6, %call7, !dbg !86 + store volatile i32 %add8, ptr @x, align 4, !dbg !86, !tbaa !54 + %7 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !54 + %call9 = call i32 @bar(i32 noundef %7), !dbg !88 + %8 = load volatile i32, ptr @x, align 4, !dbg !90, !tbaa !54 + %add10 = add nsw i32 %8, %call9, !dbg !90 + store volatile i32 %add10, ptr @x, align 4, !dbg !90, !tbaa !54 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !91 + %inc = add nsw i32 %i.0, 1, !dbg !91 + call void @llvm.dbg.value(metadata i32 %inc, metadata !50, metadata !DIExpression()), !dbg !62 + br label %for.cond, !dbg !92, !llvm.loop !93 + +cleanup: ; preds = %if.then5, %for.cond.cleanup + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !96 + br label %for.end + +for.end: ; preds = %cleanup, %if.then + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg !97 + ret i32 0, !dbg !97 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #5 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #6 + +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #3 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #6 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11} +!llvm.ident = !{!12} +!llvm.pseudo_probe_desc = !{!13, !14, !15} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "path") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 7, !"uwtable", i32 2} +!11 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!12 = !{!"clang version 17.0.0"} +!13 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} +!14 = !{i64 6699318081062747564, i64 563022570642068, !"foo"} +!15 = !{i64 -2624081020897602054, i64 1126158552146340, !"main"} +!16 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 2, type: !17, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19) +!17 = !DISubroutineType(types: !18) +!18 = !{!6, !6} +!19 = !{!20} +!20 = !DILocalVariable(name: "p", arg: 1, scope: !16, file: !3, line: 2, type: !6) +!21 = !DILocation(line: 0, scope: !16) +!22 = !DILocation(line: 3, column: 10, scope: !16) +!23 = !DILocation(line: 3, column: 3, scope: !16) +!24 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !25, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !27) +!25 = !DISubroutineType(types: !26) +!26 = !{!6, !6, !6} +!27 = !{!28, !29} +!28 = !DILocalVariable(name: "i", arg: 1, scope: !24, file: !3, line: 6, type: !6) +!29 = !DILocalVariable(name: "p", arg: 2, scope: !24, file: !3, line: 6, type: !6) +!30 = !DILocation(line: 0, scope: !24) +!31 = !DILocation(line: 7, column: 6, scope: !32) +!32 = distinct !DILexicalBlock(scope: !24, file: !3, line: 7, column: 6) +!33 = !DILocation(line: 7, column: 8, scope: !32) +!34 = !DILocation(line: 7, column: 6, scope: !24) +!35 = !DILocation(line: 7, column: 26, scope: !32) +!36 = !DILocation(line: 7, column: 22, scope: !37) +!37 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646575) +!38 = !DILocation(line: 7, column: 14, scope: !32) +!39 = !DILocation(line: 8, column: 19, scope: !32) +!40 = !DILocation(line: 8, column: 21, scope: !32) +!41 = !DILocation(line: 8, column: 15, scope: !42) +!42 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646583) +!43 = !DILocation(line: 8, column: 8, scope: !32) +!44 = !DILocation(line: 0, scope: !32) +!45 = !DILocation(line: 9, column: 1, scope: !24) +!46 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !47, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !49) +!47 = !DISubroutineType(types: !48) +!48 = !{!6} +!49 = !{!50} +!50 = !DILocalVariable(name: "i", scope: !51, file: !3, line: 14, type: !6) +!51 = distinct !DILexicalBlock(scope: !46, file: !3, line: 14, column: 3) +!52 = !DILocation(line: 12, column: 6, scope: !53) +!53 = distinct !DILexicalBlock(scope: !46, file: !3, line: 12, column: 6) +!54 = !{!55, !55, i64 0} +!55 = !{!"int", !56, i64 0} +!56 = !{!"omnipotent char", !57, i64 0} +!57 = !{!"Simple C/C++ TBAA"} +!58 = !DILocation(line: 12, column: 8, scope: !53) +!59 = !DILocation(line: 12, column: 6, scope: !46) +!60 = !DILocation(line: 13, column: 5, scope: !53) +!61 = !DILocation(line: 14, column: 11, scope: !51) +!62 = !DILocation(line: 0, scope: !51) +!63 = !DILocation(line: 14, column: 7, scope: !51) +!64 = !DILocation(line: 14, scope: !51) +!65 = !DILocation(line: 14, column: 18, scope: !66) +!66 = distinct !DILexicalBlock(scope: !51, file: !3, line: 14, column: 3) +!67 = !DILocation(line: 14, column: 20, scope: !66) +!68 = !DILocation(line: 14, column: 3, scope: !51) +!69 = !DILocation(line: 15, column: 15, scope: !70) +!70 = distinct !DILexicalBlock(scope: !66, file: !3, line: 14, column: 40) +!71 = !DILocation(line: 15, column: 18, scope: !70) +!72 = !DILocation(line: 15, column: 11, scope: !73) +!73 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646639) +!74 = !DILocation(line: 15, column: 8, scope: !70) +!75 = !DILocation(line: 16, column: 15, scope: !70) +!76 = !DILocation(line: 16, column: 11, scope: !77) +!77 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646647) +!78 = !DILocation(line: 16, column: 8, scope: !70) +!79 = !DILocation(line: 17, column: 9, scope: !70) +!80 = !DILocation(line: 18, column: 8, scope: !81) +!81 = distinct !DILexicalBlock(scope: !70, file: !3, line: 17, column: 9) +!82 = !DILocation(line: 19, column: 15, scope: !70) +!83 = !DILocation(line: 19, column: 18, scope: !70) +!84 = !DILocation(line: 19, column: 11, scope: !85) +!85 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646655) +!86 = !DILocation(line: 19, column: 8, scope: !70) +!87 = !DILocation(line: 20, column: 15, scope: !70) +!88 = !DILocation(line: 20, column: 11, scope: !89) +!89 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646663) +!90 = !DILocation(line: 20, column: 8, scope: !70) +!91 = !DILocation(line: 14, column: 36, scope: !66) +!92 = !DILocation(line: 14, column: 3, scope: !66) +!93 = distinct !{!93, !68, !94, !95} +!94 = !DILocation(line: 21, column: 3, scope: !51) +!95 = !{!"llvm.loop.mustprogress"} +!96 = !DILocation(line: 0, scope: !46) +!97 = !DILocation(line: 22, column: 1, scope: !46) -- Gitee From 523cdc82664a4cf03377e05d14dace1acc39b1d9 Mon Sep 17 00:00:00 2001 From: Mark Santaniello Date: Thu, 25 May 2023 08:11:40 -0700 Subject: [PATCH 06/13] Avoid pointless canonicalize when using Dwarf names CPU profile indicated memcmp was hot due to the two rfind calls in getCanonicalFnName. If UseSymbolTable is false, we can avoid the cost entirely. For CSSPGO profiles I've measured ~5% speedup with this change. Profile similarity before/after matches 100%. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D151441 --- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 11 ++++++----- llvm/tools/llvm-profgen/ProfiledBinary.h | 10 ++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 00e9d5025876..f18c62d0dcd0 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -163,12 +163,13 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( } ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath, - const StringRef DebugBinPath) - : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), + const StringRef DebugBinPath) + : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), + SymbolizerOpts(getSymbolizerOpts()), ProEpilogTracker(this), + Symbolizer(std::make_unique(SymbolizerOpts)), TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) { // Point to executable binary if debug info binary is not specified. SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; - setupSymbolizer(); if (InferMissingFrames) MissingContextInferrer = std::make_unique(this); load(); @@ -840,7 +841,7 @@ void ProfiledBinary::populateSymbolListFromDWARF( SymbolList.add(I.second.getFuncName()); } -void ProfiledBinary::setupSymbolizer() { +symbolize::LLVMSymbolizer::Options ProfiledBinary::getSymbolizerOpts() const { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::LinkageName; @@ -849,7 +850,7 @@ void ProfiledBinary::setupSymbolizer() { SymbolizerOpts.UseSymbolTable = false; SymbolizerOpts.RelativeAddresses = false; SymbolizerOpts.DWPName = DWPPath; - Symbolizer = std::make_unique(SymbolizerOpts); + return SymbolizerOpts; } SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index cdbaec740b4a..d44a77a8b6a3 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -188,10 +188,12 @@ class ProfiledBinary { std::string Path; // Path of the debug info binary. std::string DebugBinaryPath; - // Path of symbolizer path which should be pointed to binary with debug info. - StringRef SymbolizerPath; // The target triple. Triple TheTriple; + // Path of symbolizer path which should be pointed to binary with debug info. + StringRef SymbolizerPath; + // Options used to configure the symbolizer + symbolize::LLVMSymbolizer::Options SymbolizerOpts; // The runtime base address that the first executable segment is loaded at. uint64_t BaseAddress = 0; // The runtime base address that the first loadabe segment is loaded at. @@ -302,7 +304,7 @@ class ProfiledBinary { // Set up disassembler and related components. void setUpDisassembler(const ELFObjectFileBase *Obj); - void setupSymbolizer(); + symbolize::LLVMSymbolizer::Options getSymbolizerOpts() const; // Load debug info of subprograms from DWARF section. void loadSymbolsFromDWARF(ObjectFile &Obj); @@ -493,7 +495,7 @@ public: SampleContextFrameVector getFrameLocationStack(uint64_t Address, bool UseProbeDiscriminator = false) { InstructionPointer IP(this, Address); - return symbolize(IP, true, UseProbeDiscriminator); + return symbolize(IP, SymbolizerOpts.UseSymbolTable, UseProbeDiscriminator); } const SampleContextFrameVector & -- Gitee From b4a26e0750b5eb3063383e197bcba0996bd75bab Mon Sep 17 00:00:00 2001 From: spupyrev Date: Fri, 12 May 2023 10:27:12 -0700 Subject: [PATCH 07/13] profilie inference changes for stale profile matching This diff facilitates a new stale profile matching in BOLT: D144500 This is a no-op for existing usages of profi (CSSPGO). Reviewed By: hoy Differential Revision: https://reviews.llvm.org/D150466 --- .../Utils/SampleProfileInference.cpp | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 691ee00bd831..31d62fbf0618 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -20,6 +20,7 @@ #include #include #include +#include using namespace llvm; #define DEBUG_TYPE "sample-profile-inference" @@ -1218,10 +1219,23 @@ void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network, #ifndef NDEBUG /// Verify that the provided block/jump weights are as expected. void verifyInput(const FlowFunction &Func) { - // Verify the entry block + // Verify entry and exit blocks assert(Func.Entry == 0 && Func.Blocks[0].isEntry()); + size_t NumExitBlocks = 0; for (size_t I = 1; I < Func.Blocks.size(); I++) { assert(!Func.Blocks[I].isEntry() && "multiple entry blocks"); + if (Func.Blocks[I].isExit()) + NumExitBlocks++; + } + assert(NumExitBlocks > 0 && "cannot find exit blocks"); + + // Verify that there are no parallel edges + for (auto &Block : Func.Blocks) { + std::unordered_set UniqueSuccs; + for (auto &Jump : Block.SuccJumps) { + auto It = UniqueSuccs.insert(Jump->Target); + assert(It.second && "input CFG contains parallel edges"); + } } // Verify CFG jumps for (auto &Block : Func.Blocks) { @@ -1304,8 +1318,26 @@ void verifyOutput(const FlowFunction &Func) { } // end of anonymous namespace -/// Apply the profile inference algorithm for a given function +/// Apply the profile inference algorithm for a given function and provided +/// profi options void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) { + // Check if the function has samples and assign initial flow values + bool HasSamples = false; + for (FlowBlock &Block : Func.Blocks) { + if (Block.Weight > 0) + HasSamples = true; + Block.Flow = Block.Weight; + } + for (FlowJump &Jump : Func.Jumps) { + if (Jump.Weight > 0) + HasSamples = true; + Jump.Flow = Jump.Weight; + } + + // Quit early for functions with a single block or ones w/o samples + if (Func.Blocks.size() <= 1 || !HasSamples) + return; + #ifndef NDEBUG // Verify the input data verifyInput(Func); -- Gitee From 7cfa6cd08fc7017a8b0c0bb56eee9fa8beaad6ec Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Sun, 25 Jun 2023 16:39:16 -0700 Subject: [PATCH 08/13] [NFC] Generalize llvm-profgen message to cover both AutoFDO and CSSPGO Update llvm-profgen profile density message to cover both AutoFDO and CSSPGO. Differential Revision: https://reviews.llvm.org/D153730 --- llvm/test/tools/llvm-profgen/profile-density.test | 2 +- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test index 1d7c2f9cd5df..0eb83838d16e 100644 --- a/llvm/test/tools/llvm-profgen/profile-density.test +++ b/llvm/test/tools/llvm-profgen/profile-density.test @@ -4,7 +4,7 @@ ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -hot-function-density-threshold=1 &> %t4 ; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS -;CHECK-DENSITY: AutoFDO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. +;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. ;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2 ;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 2728f80da64b..6cb6b1e8f713 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -188,7 +188,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) { "set too low. Please check your command.\n"; else if (Density < HotFunctionDensityThreshold) WithColor::warning() - << "AutoFDO is estimated to optimize better with " + << "Sample PGO is estimated to optimize better with " << format("%.1f", HotFunctionDensityThreshold / Density) << "x more samples. Please consider increasing sampling rate or " "profiling for longer duration to get more samples.\n"; -- Gitee From e6db4c72a3716777e7f73e9472d1a30eff45191e Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Mon, 26 Jun 2023 11:50:30 -0700 Subject: [PATCH 09/13] [CSSPGO][Preinliner] Bump up the threshold to favor previous compiler inline decision. The compiler has more insight and knowledge about functions based on their IR and attribures and should make a better inline decision than the offline preinliner does which is purely based on callsites hotness and code size. Therefore I'm making changes to favor previous compiler inline decision by bumping up the callsite allowance. This should improve the performance by more than 1% according to testing on Meta services. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D153797 --- llvm/tools/llvm-profgen/CSPreInliner.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp index d10cf4c895b2..330a8f33de21 100644 --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -55,6 +55,12 @@ static cl::opt SamplePreInlineReplay( cl::desc( "Replay previous inlining and adjust context profile accordingly")); +static cl::opt CSPreinlMultiplierForPrevInl( + "csspgo-preinliner-multiplier-for-previous-inlining", cl::Hidden, + cl::init(100), + cl::desc( + "Multiplier to bump up callsite threshold for previous inlining.")); + CSPreInliner::CSPreInliner(SampleContextTracker &Tracker, ProfiledBinary &Binary, ProfileSummary *Summary) : UseContextCost(UseContextCostForPreInliner), @@ -151,11 +157,12 @@ uint32_t CSPreInliner::getFuncSize(const ContextTrieNode *ContextNode) { } bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { + bool WasInlined = + Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); // If replay inline is requested, simply follow the inline decision of the // profiled binary. if (SamplePreInlineReplay) - return Candidate.CalleeSamples->getContext().hasAttribute( - ContextWasInlined); + return WasInlined; unsigned int SampleThreshold = SampleColdCallSiteThreshold; uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( @@ -182,6 +189,12 @@ bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { // want any inlining for cold callsites. SampleThreshold = SampleHotCallSiteThreshold * NormalizedHotness * 100 + SampleColdCallSiteThreshold + 1; + // Bump up the threshold to favor previous compiler inline decision. The + // compiler has more insight and knowledge about functions based on their IR + // and attribures and should be able to make a more reasonable inline + // decision. + if (WasInlined) + SampleThreshold *= CSPreinlMultiplierForPrevInl; } return (Candidate.SizeCost < SampleThreshold); -- Gitee From c6d88cd7a8b5cb8026f101d8588e85687aaeda98 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Tue, 27 Jun 2023 16:31:50 -0700 Subject: [PATCH 10/13] [CSSPGO][Preinliner] Always inline zero-sized functions. Zero-sized functions should be cost-free in term of size budget, so they should be considered during inlining even if we run out of size budget. This appears to give 0.5% win for one of our internal services. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D153820 --- llvm/tools/llvm-profgen/CSPreInliner.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/tools/llvm-profgen/CSPreInliner.h b/llvm/tools/llvm-profgen/CSPreInliner.h index 09dd2dec1149..4d848aafdab9 100644 --- a/llvm/tools/llvm-profgen/CSPreInliner.h +++ b/llvm/tools/llvm-profgen/CSPreInliner.h @@ -41,6 +41,13 @@ struct ProfiledInlineCandidate { struct ProfiledCandidateComparer { bool operator()(const ProfiledInlineCandidate &LHS, const ProfiledInlineCandidate &RHS) { + // Always prioritize inlining zero-sized functions as they do not affect the + // size budget. This could happen when all of the callee's code is gone and + // only pseudo probes are left. + if ((LHS.SizeCost == 0 || RHS.SizeCost == 0) && + (LHS.SizeCost != RHS.SizeCost)) + return RHS.SizeCost == 0; + if (LHS.CallsiteCount != RHS.CallsiteCount) return LHS.CallsiteCount < RHS.CallsiteCount; -- Gitee From bea3f73409766c4e78b22d36fbbdf3286bd87467 Mon Sep 17 00:00:00 2001 From: wlei Date: Wed, 28 Jun 2023 17:21:40 -0700 Subject: [PATCH 11/13] [CSSPGO] Enable stale profile matching by default for CSSPGO We tested the stale profile matching on several Meta's internal services, all results are positive, for instance, in one service that refreshed its profile every one or two weeks, it consistently gave 1~2% performance improvement. We also observed an instance that a trivial refactoring caused a 2% regression and the matching can successfully recover the whole regression. Therefore, we'd like to turn it on by default for CSSPGO. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D154027 --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 66d7ee54d89f..2c99290fe858 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -2066,6 +2066,16 @@ bool SampleProfileLoader::doInitialization(Module &M, UsePreInlinerDecision = true; } + // Enable stale profile matching by default for probe-based profile. + // Currently the matching relies on if the checksum mismatch is detected, + // which is currently only available for pseudo-probe mode. Removing the + // checksum check could cause regressions for some cases, so further tuning + // might be needed if we want to enable it for all cases. + if (Reader->profileIsProbeBased() && + !SalvageStaleProfile.getNumOccurrences()) { + SalvageStaleProfile = true; + } + if (!Reader->profileIsCS()) { // Non-CS profile should be fine without a function size budget for the // inliner since the contexts in the profile are either all from inlining -- Gitee From ff0527a3019c7c8e951a67e3279da69dedf2b000 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Wed, 1 Feb 2023 09:24:44 -0800 Subject: [PATCH 12/13] [NFC][Profile] Access profile through VirtualFileSystem Make the access to profile data going through virtual file system so the inputs can be remapped. In the context of the caching, it can make sure we capture the inputs and provided an immutable input as profile data. Reviewed By: akyrtzi, benlangmuir Differential Revision: https://reviews.llvm.org/D139052 --- clang/include/clang/CodeGen/BackendUtil.h | 5 ++ clang/lib/CodeGen/BackendUtil.cpp | 55 ++++++++++--------- clang/lib/CodeGen/CodeGenAction.cpp | 23 ++++---- clang/lib/CodeGen/CodeGenModule.cpp | 13 +++-- .../ObjectFilePCHContainerOperations.cpp | 4 +- clang/lib/Frontend/CompilerInvocation.cpp | 21 +++++-- llvm/include/llvm/CodeGen/MIRSampleProfile.h | 8 ++- llvm/include/llvm/CodeGen/Passes.h | 12 +++- llvm/include/llvm/Passes/PassBuilder.h | 10 +++- .../ProfileData/Coverage/CoverageMapping.h | 7 ++- .../llvm/ProfileData/InstrProfReader.h | 10 +++- .../llvm/ProfileData/SampleProfReader.h | 12 ++-- llvm/include/llvm/Support/PGOOptions.h | 42 +++++--------- .../llvm/Transforms/IPO/SampleProfile.h | 11 +++- .../Instrumentation/PGOInstrumentation.h | 9 ++- .../Utils/SampleProfileLoaderBaseImpl.h | 13 ++++- llvm/lib/CodeGen/MIRSampleProfile.cpp | 35 +++++++----- llvm/lib/CodeGen/TargetPassConfig.cpp | 13 +++-- llvm/lib/LTO/LTOBackend.cpp | 10 ++-- llvm/lib/Passes/PassBuilderPipelines.cpp | 31 ++++++----- .../ProfileData/Coverage/CoverageMapping.cpp | 7 ++- llvm/lib/ProfileData/InstrProf.cpp | 6 +- llvm/lib/ProfileData/InstrProfReader.cpp | 18 +++--- llvm/lib/ProfileData/SampleProfReader.cpp | 21 ++++--- llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/PGOOptions.cpp | 52 ++++++++++++++++++ llvm/lib/Target/X86/X86InsertPrefetch.cpp | 5 +- llvm/lib/Transforms/IPO/SampleProfile.cpp | 17 +++++- .../Instrumentation/PGOInstrumentation.cpp | 19 ++++--- llvm/tools/llvm-cov/CodeCoverage.cpp | 3 +- llvm/tools/llvm-profdata/llvm-profdata.cpp | 26 ++++++--- llvm/tools/llvm-profgen/llvm-profgen.cpp | 5 +- llvm/tools/opt/NewPMDriver.cpp | 16 ++++-- llvm/unittests/ProfileData/SampleProfTest.cpp | 4 +- 34 files changed, 359 insertions(+), 185 deletions(-) create mode 100644 llvm/lib/Support/PGOOptions.cpp diff --git a/clang/include/clang/CodeGen/BackendUtil.h b/clang/include/clang/CodeGen/BackendUtil.h index d97af65a3d01..cdbfe4ca5e65 100644 --- a/clang/include/clang/CodeGen/BackendUtil.h +++ b/clang/include/clang/CodeGen/BackendUtil.h @@ -16,8 +16,12 @@ namespace llvm { class BitcodeModule; template class Expected; + template class IntrusiveRefCntPtr; class Module; class MemoryBufferRef; + namespace vfs { + class FileSystem; + } // namespace vfs } namespace clang { @@ -40,6 +44,7 @@ namespace clang { const CodeGenOptions &CGOpts, const TargetOptions &TOpts, const LangOptions &LOpts, StringRef TDesc, llvm::Module *M, BackendAction Action, + llvm::IntrusiveRefCntPtr VFS, std::unique_ptr OS); void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 10d6bff25e6d..2b43d0e38bf7 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -123,6 +124,7 @@ class EmitAssemblyHelper { const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; Module *TheModule; + IntrusiveRefCntPtr VFS; Timer CodeGenerationTime; @@ -187,9 +189,10 @@ public: const HeaderSearchOptions &HeaderSearchOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, Module *M) + const LangOptions &LOpts, Module *M, + IntrusiveRefCntPtr VFS) : Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts), - TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), + TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), VFS(std::move(VFS)), CodeGenerationTime("codegen", "Code Generation Time"), TargetTriple(TheModule->getTargetTriple()) {} @@ -767,32 +770,33 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (CodeGenOpts.hasProfileIRInstr()) // -fprofile-generate. - PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() - ? getDefaultProfileGenName() - : CodeGenOpts.InstrProfileOutput, - "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = PGOOptions( + CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() + : CodeGenOpts.InstrProfileOutput, + "", "", nullptr, PGOOptions::IRInstr, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse : PGOOptions::NoCSAction; - PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, PGOOptions::IRUse, - CSAction, CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, VFS, PGOOptions::IRUse, + CSAction, CodeGenOpts.DebugInfoForProfiling); } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use PGOOpt = PGOOptions( CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile, - PGOOptions::SampleUse, PGOOptions::NoCSAction, + VFS, PGOOptions::SampleUse, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling); else if (CodeGenOpts.PseudoProbeForProfiling) // -fpseudo-probe-for-profiling - PGOOpt = - PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling, true); + PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, + PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling, true); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, PGOOptions::NoCSAction, true); // Check to see if we want to generate a CS profile. @@ -810,12 +814,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline( : CodeGenOpts.InstrProfileOutput; PGOOpt->CSAction = PGOOptions::CSIRInstr; } else - PGOOpt = PGOOptions("", - CodeGenOpts.InstrProfileOutput.empty() - ? getDefaultProfileGenName() - : CodeGenOpts.InstrProfileOutput, - "", PGOOptions::NoAction, PGOOptions::CSIRInstr, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions("", + CodeGenOpts.InstrProfileOutput.empty() + ? getDefaultProfileGenName() + : CodeGenOpts.InstrProfileOutput, + "", nullptr, PGOOptions::NoAction, PGOOptions::CSIRInstr, + CodeGenOpts.DebugInfoForProfiling); } if (TM) TM->setPGOOption(PGOOpt); @@ -1219,9 +1224,9 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, - StringRef TDesc, Module *M, - BackendAction Action, + const LangOptions &LOpts, StringRef TDesc, + Module *M, BackendAction Action, + IntrusiveRefCntPtr VFS, std::unique_ptr OS) { llvm::TimeTraceScope TimeScope("Backend"); @@ -1264,7 +1269,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, } } - EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M); + EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M, VFS); AsmHelper.EmitAssembly(Action, std::move(OS)); // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 2b219267869e..1d6922176b35 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -115,6 +115,7 @@ namespace clang { const LangOptions &LangOpts; std::unique_ptr AsmOutStream; ASTContext *Context; + IntrusiveRefCntPtr FS; Timer LLVMIRGeneration; unsigned LLVMIRGenerationRefCount; @@ -147,7 +148,7 @@ namespace clang { public: BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr FS, + IntrusiveRefCntPtr VFS, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, @@ -158,10 +159,10 @@ namespace clang { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - AsmOutStream(std::move(OS)), Context(nullptr), + AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, InFile, std::move(FS), HeaderSearchOpts, + Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { TimerIsEnabled = CodeGenOpts.TimePasses; @@ -173,7 +174,7 @@ namespace clang { // to use the clang diagnostic handler for IR input files. It avoids // initializing the OS field. BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr FS, + IntrusiveRefCntPtr VFS, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, @@ -183,10 +184,10 @@ namespace clang { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - Context(nullptr), + Context(nullptr), FS(VFS), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, "", std::move(FS), HeaderSearchOpts, + Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)), CurLinkModule(Module) { TimerIsEnabled = CodeGenOpts.TimePasses; @@ -381,7 +382,7 @@ namespace clang { EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, C.getTargetInfo().getDataLayoutString(), - getModule(), Action, std::move(AsmOutStream)); + getModule(), Action, FS, std::move(AsmOutStream)); Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); @@ -1238,10 +1239,10 @@ void CodeGenAction::ExecuteAction() { std::unique_ptr OptRecordFile = std::move(*OptRecordFileOrErr); - EmitBackendOutput(Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, - TargetOpts, CI.getLangOpts(), - CI.getTarget().getDataLayoutString(), TheModule.get(), BA, - std::move(OS)); + EmitBackendOutput( + Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, TargetOpts, + CI.getLangOpts(), CI.getTarget().getDataLayoutString(), TheModule.get(), + BA, CI.getFileManager().getVirtualFileSystemPtr(), std::move(OS)); if (OptRecordFile) OptRecordFile->keep(); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 12d602fed693..d5311806eace 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -107,11 +107,11 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) - : Context(C), LangOpts(C.getLangOpts()), FS(std::move(FS)), - HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), - TheModule(M), Diags(diags), Target(C.getTargetInfo()), - ABI(createCXXABI(*this)), VMContext(M.getContext()), Types(*this), - VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { + : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), + PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), + Target(C.getTargetInfo()), ABI(createCXXABI(*this)), + VMContext(M.getContext()), Types(*this), VTables(*this), + SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); @@ -185,7 +185,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( - CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); + CodeGenOpts.ProfileInstrumentUsePath, *FS, + CodeGenOpts.ProfileRemappingFile); // We're checking for profile read errors in CompilerInvocation, so if // there was an error it should've already been caught. If it hasn't been // somehow, trip an assertion. diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 677b66d3e1dc..2f2126eff527 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -320,7 +320,7 @@ public: clang::EmitBackendOutput( Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayoutString(), M.get(), - BackendAction::Backend_EmitLL, + BackendAction::Backend_EmitLL, FS, std::make_unique(Buffer)); llvm::dbgs() << Buffer; }); @@ -329,7 +329,7 @@ public: clang::EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayoutString(), M.get(), - BackendAction::Backend_EmitObj, std::move(OS)); + BackendAction::Backend_EmitObj, FS, std::move(OS)); // Free the memory for the temporary buffer. llvm::SmallVector Empty; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0bb9c8c83c63..ed483d22550b 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1304,8 +1304,9 @@ static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) { // Set the profile kind using fprofile-instrument-use-path. static void setPGOUseInstrumentor(CodeGenOptions &Opts, const Twine &ProfileName, + llvm::vfs::FileSystem &FS, DiagnosticsEngine &Diags) { - auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName); + auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName, FS); if (auto E = ReaderOrErr.takeError()) { unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "Error in reading profile %0: %1"); @@ -1724,9 +1725,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, : codegenoptions::DebugTemplateNamesKind::Mangled); } - if (!Opts.ProfileInstrumentUsePath.empty()) - setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath, Diags); - if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ)) { Opts.TimePasses = true; @@ -1962,8 +1960,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.OptimizationRemarkAnalysis.hasValidPattern(); bool UsingSampleProfile = !Opts.SampleProfileFile.empty(); - bool UsingProfile = UsingSampleProfile || - (Opts.getProfileUse() != CodeGenOptions::ProfileNone); + bool UsingProfile = + UsingSampleProfile || !Opts.ProfileInstrumentUsePath.empty(); if (Opts.DiagnosticsWithHotness && !UsingProfile && // An IR file will contain PGO as metadata @@ -4563,6 +4561,17 @@ bool CompilerInvocation::CreateFromArgsImpl( append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); } + // Set PGOOptions. Need to create a temporary VFS to read the profile + // to determine the PGO type. + if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty()) { + auto FS = + createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles, + Diags, llvm::vfs::getRealFileSystem()); + setPGOUseInstrumentor(Res.getCodeGenOpts(), + Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS, + Diags); + } + FixupInvocation(Res, Diags, Args, DashX); return Diags.getNumErrors() == NumErrorsBefore; diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h index f54c4b5891be..221e966e2b9e 100644 --- a/llvm/include/llvm/CodeGen/MIRSampleProfile.h +++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H #define LLVM_CODEGEN_MIRSAMPLEPROFILE_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/Discriminator.h" @@ -26,6 +27,10 @@ class MachineBlockFrequencyInfo; class MachineFunction; class Module; +namespace vfs { +class FileSystem; +} // namespace vfs + using namespace sampleprof; class MIRProfileLoader; @@ -41,7 +46,8 @@ public: /// FS bits will only use the '1' bits in the Mask. MIRProfileLoaderPass(std::string FileName = "", std::string RemappingFileName = "", - FSDiscriminatorPass P = FSDiscriminatorPass::Pass1); + FSDiscriminatorPass P = FSDiscriminatorPass::Pass1, + IntrusiveRefCntPtr FS = nullptr); /// getMachineFunction - Return the last machine function computed. const MachineFunction *getMachineFunction() const { return MF; } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index b331c9a19fd1..66d213c2877b 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -31,6 +31,11 @@ class Pass; class TargetMachine; class raw_ostream; +template class IntrusiveRefCntPtr; +namespace vfs { +class FileSystem; +} // namespace vfs + } // End llvm namespace // List of target independent CodeGen pass IDs. @@ -551,9 +556,10 @@ namespace llvm { createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); /// Read Flow Sensitive Profile. - FunctionPass *createMIRProfileLoaderPass(std::string File, - std::string RemappingFile, - sampleprof::FSDiscriminatorPass P); + FunctionPass * + createMIRProfileLoaderPass(std::string File, std::string RemappingFile, + sampleprof::FSDiscriminatorPass P, + IntrusiveRefCntPtr FS); /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index b7e6764d428b..9d9f256f1ced 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -32,6 +32,10 @@ class StringRef; class AAManager; class TargetMachine; class ModuleSummaryIndex; +template class IntrusiveRefCntPtr; +namespace vfs { +class FileSystem; +} // namespace vfs /// Tunable parameters for passes in the default pipelines. class PipelineTuningOptions { @@ -567,7 +571,8 @@ public: /// Add PGOInstrumenation passes for O0 only. void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, - std::string ProfileRemappingFile); + std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS); /// Returns PIC. External libraries can use this to register pass /// instrumentation callbacks. @@ -607,7 +612,8 @@ private: void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile, - ThinOrFullLTOPhase LTOPhase); + ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); // Extension Point callbacks diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index bdb772862468..37ddae87d574 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -47,6 +47,10 @@ namespace object { class BuildIDFetcher; } // namespace object +namespace vfs { +class FileSystem; +} // namespace vfs + namespace coverage { class CoverageMappingReader; @@ -616,7 +620,8 @@ public: /// Ignores non-instrumented object files unless all are not instrumented. static Expected> load(ArrayRef ObjectFilenames, StringRef ProfileFilename, - ArrayRef Arches = std::nullopt, StringRef CompilationDir = "", + vfs::FileSystem &FS, ArrayRef Arches = std::nullopt, + StringRef CompilationDir = "", const object::BuildIDFetcher *BIDFetcher = nullptr); /// The number of functions that couldn't have their profiles mapped. diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 9c216e57d005..c46fb8bf00b8 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -41,6 +41,10 @@ namespace llvm { class InstrProfReader; +namespace vfs { +class FileSystem; +} // namespace vfs + /// A file format agnostic iterator over profiling data. template @@ -190,7 +194,8 @@ public: /// Factory method to create an appropriately typed reader for the given /// instrprof file. static Expected> - create(const Twine &Path, const InstrProfCorrelator *Correlator = nullptr); + create(const Twine &Path, vfs::FileSystem &FS, + const InstrProfCorrelator *Correlator = nullptr); static Expected> create(std::unique_ptr Buffer, @@ -693,7 +698,8 @@ public: /// Factory method to create an indexed reader. static Expected> - create(const Twine &Path, const Twine &RemappingPath = ""); + create(const Twine &Path, vfs::FileSystem &FS, + const Twine &RemappingPath = ""); static Expected> create(std::unique_ptr Buffer, diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 57e8c8c74e4e..703ca81e332d 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -251,6 +251,10 @@ namespace llvm { class raw_ostream; class Twine; +namespace vfs { +class FileSystem; +} // namespace vfs + namespace sampleprof { class SampleProfileReader; @@ -270,8 +274,8 @@ public: /// Create a remapper from the given remapping file. The remapper will /// be used for profile read in by Reader. static ErrorOr> - create(const std::string Filename, SampleProfileReader &Reader, - LLVMContext &C); + create(const std::string Filename, vfs::FileSystem &FS, + SampleProfileReader &Reader, LLVMContext &C); /// Create a remapper from the given Buffer. The remapper will /// be used for profile read in by Reader. @@ -450,7 +454,7 @@ public: /// Create a remapper underlying if RemapFilename is not empty. /// Parameter P specifies the FSDiscriminatorPass. static ErrorOr> - create(const std::string Filename, LLVMContext &C, + create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P = FSDiscriminatorPass::Base, const std::string RemapFilename = ""); @@ -458,7 +462,7 @@ public: /// Create a remapper underlying if RemapFilename is not empty. /// Parameter P specifies the FSDiscriminatorPass. static ErrorOr> - create(std::unique_ptr &B, LLVMContext &C, + create(std::unique_ptr &B, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P = FSDiscriminatorPass::Base, const std::string RemapFilename = ""); diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h index 2141e2159c0c..45a3b9a010f9 100644 --- a/llvm/include/llvm/Support/PGOOptions.h +++ b/llvm/include/llvm/Support/PGOOptions.h @@ -14,44 +14,29 @@ #ifndef LLVM_SUPPORT_PGOOPTIONS_H #define LLVM_SUPPORT_PGOOPTIONS_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/Error.h" namespace llvm { +namespace vfs { +class FileSystem; +} // namespace vfs + /// A struct capturing PGO tunables. struct PGOOptions { enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; - PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", - std::string ProfileRemappingFile = "", PGOAction Action = NoAction, - CSPGOAction CSAction = NoCSAction, + PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, + std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS, + PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction, bool DebugInfoForProfiling = false, - bool PseudoProbeForProfiling = false) - : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), - ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || - (Action == SampleUse && - !PseudoProbeForProfiling)), - PseudoProbeForProfiling(PseudoProbeForProfiling) { - // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can - // callback with IRUse action without ProfileFile. - - // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. - assert(this->CSAction == NoCSAction || - (this->Action != IRInstr && this->Action != SampleUse)); - - // For CSIRInstr, CSProfileGenFile also needs to be nonempty. - assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); - - // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share - // a profile. - assert(this->CSAction != CSIRUse || this->Action == IRUse); + bool PseudoProbeForProfiling = false); + PGOOptions(const PGOOptions &); + ~PGOOptions(); + PGOOptions &operator=(const PGOOptions &); - // If neither Action nor CSAction, DebugInfoForProfiling or - // PseudoProbeForProfiling needs to be true. - assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->DebugInfoForProfiling || this->PseudoProbeForProfiling); - } std::string ProfileFile; std::string CSProfileGenFile; std::string ProfileRemappingFile; @@ -59,6 +44,7 @@ struct PGOOptions { CSPGOAction CSAction; bool DebugInfoForProfiling; bool PseudoProbeForProfiling; + IntrusiveRefCntPtr FS; }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h index d838c8b8a83e..2ef55949e236 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include @@ -22,14 +23,17 @@ namespace llvm { class Module; +namespace vfs { +class FileSystem; +} // namespace vfs + /// The sample profiler data loader pass. class SampleProfileLoaderPass : public PassInfoMixin { public: SampleProfileLoaderPass( std::string File = "", std::string RemappingFile = "", - ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) - : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), - LTOPhase(LTOPhase) {} + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None, + IntrusiveRefCntPtr FS = nullptr); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -37,6 +41,7 @@ private: std::string ProfileFileName; std::string ProfileRemappingFileName; const ThinOrFullLTOPhase LTOPhase; + IntrusiveRefCntPtr FS; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index 875a17094d2e..952bc2f8edbe 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -16,6 +16,7 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_PGOINSTRUMENTATION_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" #include #include @@ -26,6 +27,10 @@ class Function; class Instruction; class Module; +namespace vfs { +class FileSystem; +} // namespace vfs + /// The instrumentation (profile-instr-gen) pass for IR based PGO. // We use this pass to create COMDAT profile variables for context // sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO @@ -58,7 +63,8 @@ private: class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = "", bool IsCS = false); + std::string RemappingFilename = "", bool IsCS = false, + IntrusiveRefCntPtr FS = nullptr); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -67,6 +73,7 @@ private: std::string ProfileRemappingFileName; // If this is a context sensitive instrumentation. bool IsCS; + IntrusiveRefCntPtr FS; }; /// The indirect function call promotion pass. diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 19aef8050d44..502867da3c50 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -46,6 +47,10 @@ using namespace sampleprof; using namespace sampleprofutil; using ProfileCount = Function::ProfileCount; +namespace vfs { +class FileSystem; +} // namespace vfs + #define DEBUG_TYPE "sample-profile-impl" namespace afdo_detail { @@ -79,8 +84,9 @@ extern cl::opt SampleProfileUseProfi; template class SampleProfileLoaderBaseImpl { public: - SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) - : Filename(Name), RemappingFilename(RemapName) {} + SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, + IntrusiveRefCntPtr FS) + : Filename(Name), RemappingFilename(RemapName), FS(std::move(FS)) {} void dump() { Reader->dump(); } using InstructionT = typename afdo_detail::IRTraits::InstructionT; @@ -215,6 +221,9 @@ protected: /// Name of the profile remapping file to load. std::string RemappingFilename; + /// VirtualFileSystem to load profile files from. + IntrusiveRefCntPtr FS; + /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index a8996a586909..81d86621b9df 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -26,6 +26,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" @@ -72,10 +73,11 @@ INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID; -FunctionPass *llvm::createMIRProfileLoaderPass(std::string File, - std::string RemappingFile, - FSDiscriminatorPass P) { - return new MIRProfileLoaderPass(File, RemappingFile, P); +FunctionPass * +llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile, + FSDiscriminatorPass P, + IntrusiveRefCntPtr FS) { + return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS)); } namespace llvm { @@ -136,9 +138,10 @@ public: assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } - MIRProfileLoader(StringRef Name, StringRef RemapName) - : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) { - } + MIRProfileLoader(StringRef Name, StringRef RemapName, + IntrusiveRefCntPtr FS) + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), + std::move(FS)) {} void setBranchProbs(MachineFunction &F); bool runOnFunction(MachineFunction &F); @@ -254,8 +257,8 @@ void MIRProfileLoader::setBranchProbs(MachineFunction &F) { bool MIRProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); - auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P, - RemappingFilename); + auto ReaderOrErr = sampleprof::SampleProfileReader::create( + Filename, Ctx, *FS, P, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); @@ -291,14 +294,16 @@ bool MIRProfileLoader::runOnFunction(MachineFunction &MF) { } // namespace llvm -MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName, - std::string RemappingFileName, - FSDiscriminatorPass P) - : MachineFunctionPass(ID), ProfileFileName(FileName), P(P), - MIRSampleLoader( - std::make_unique(FileName, RemappingFileName)) { +MIRProfileLoaderPass::MIRProfileLoaderPass( + std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P, + IntrusiveRefCntPtr FS) + : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) { LowBit = getFSPassBitBegin(P); HighBit = getFSPassBitEnd(P); + + auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem(); + MIRSampleLoader = std::make_unique( + FileName, RemappingFileName, std::move(VFS)); assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 3127328c363e..a047e66dcc01 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" @@ -1149,9 +1150,9 @@ void TargetPassConfig::addMachinePasses() { sampleprof::FSDiscriminatorPass::Pass1)); const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty() && !DisableRAFSProfileLoader) - addPass( - createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), - sampleprof::FSDiscriminatorPass::Pass1)); + addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass1, + nullptr)); } // Run register allocation and passes that are tightly coupled with it, @@ -1525,9 +1526,9 @@ void TargetPassConfig::addBlockPlacement() { sampleprof::FSDiscriminatorPass::Pass2)); const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader) - addPass( - createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), - sampleprof::FSDiscriminatorPass::Pass2)); + addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass2, + nullptr)); } if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 1c2ca253af35..4c41a382276a 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Program.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -232,21 +233,22 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, unsigned OptLevel, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { + auto FS = vfs::getRealFileSystem(); std::optional PGOOpt; if (!Conf.SampleProfile.empty()) - PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, + PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, FS, PGOOptions::SampleUse, PGOOptions::NoCSAction, true); else if (Conf.RunCSIRInstr) { - PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, + PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, FS, PGOOptions::IRUse, PGOOptions::CSIRInstr, Conf.AddFSDiscriminator); } else if (!Conf.CSIRProfile.empty()) { - PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, + PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, FS, PGOOptions::IRUse, PGOOptions::CSIRUse, Conf.AddFSDiscriminator); NoPGOWarnMismatch = !Conf.PGOWarnMismatch; } else if (Conf.AddFSDiscriminator) { - PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, PGOOptions::NoCSAction, true); } TM->setPGOOption(PGOOpt); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 0d074951cffc..d36912dc8f60 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/PGOOptions.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/Coroutines/CoroCleanup.h" @@ -692,7 +693,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile, - ThinOrFullLTOPhase LTOPhase) { + ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS) { assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); if (!IsCS && !DisablePreInliner) { InlineParams IP; @@ -730,7 +732,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, if (!RunProfileGen) { assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); - MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + MPM.addPass( + PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); @@ -760,13 +763,14 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, MPM.addPass(InstrProfiling(Options, IsCS)); } -void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, - bool RunProfileGen, bool IsCS, - std::string ProfileFile, - std::string ProfileRemappingFile) { +void PassBuilder::addPGOInstrPassesForO0( + ModulePassManager &MPM, bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS) { if (!RunProfileGen) { assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); - MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + MPM.addPass( + PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); @@ -1052,7 +1056,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, addPGOInstrPasses(MPM, Level, /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile, Phase); + PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS); MPM.addPass(PGOIndirectCallPromotion(false, false)); } if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && @@ -1266,11 +1270,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, - PGOOpt->ProfileRemappingFile, LTOPhase); + PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile, LTOPhase); + PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS); } // Re-compute GlobalsAA here prior to function passes. This is particularly @@ -1742,12 +1746,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile, - ThinOrFullLTOPhase::FullLTOPostLink); + ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, - ThinOrFullLTOPhase::FullLTOPostLink); + ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS); } // Break up allocas @@ -1878,7 +1882,8 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, addPGOInstrPassesForO0( MPM, /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), - /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); + /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, + PGOOpt->FS); for (auto &C : PipelineStartEPCallbacks) C(MPM, Level); diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index ce71eebd4fd3..360d30d5d0a1 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -383,10 +384,10 @@ Error CoverageMapping::loadFromFile( Expected> CoverageMapping::load(ArrayRef ObjectFilenames, - StringRef ProfileFilename, ArrayRef Arches, - StringRef CompilationDir, + StringRef ProfileFilename, vfs::FileSystem &FS, + ArrayRef Arches, StringRef CompilationDir, const object::BuildIDFetcher *BIDFetcher) { - auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename); + auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename, FS); if (Error E = ProfileReaderOrErr.takeError()) return createFileError(ProfileFilename, std::move(E)); auto ProfileReader = std::move(ProfileReaderOrErr.get()); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index aee104310a1d..342d405af9db 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/VirtualFileSystem.h" #include #include #include @@ -1224,7 +1225,10 @@ Error OverlapStats::accumulateCounts(const std::string &BaseFilename, bool IsCS) { auto getProfileSum = [IsCS](const std::string &Filename, CountSumOrPercent &Sum) -> Error { - auto ReaderOrErr = InstrProfReader::create(Filename); + // This function is only used from llvm-profdata that doesn't use any kind + // of VFS. Just create a default RealFileSystem to read profiles. + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(Filename, *FS); if (Error E = ReaderOrErr.takeError()) { return E; } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index d0714c9b4665..498ec6910440 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/SymbolRemappingReader.h" +#include "llvm/Support/VirtualFileSystem.h" #include #include #include @@ -63,9 +64,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) { } static Expected> -setupMemoryBuffer(const Twine &Path) { - ErrorOr> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); +setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { + auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() + : FS.getBufferForFile(Filename); if (std::error_code EC = BufferOrErr.getError()) return errorCodeToError(EC); return std::move(BufferOrErr.get()); @@ -161,10 +162,10 @@ static Error printBinaryIdsInternal(raw_ostream &OS, } Expected> -InstrProfReader::create(const Twine &Path, +InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, const InstrProfCorrelator *Correlator) { // Set up the buffer to read. - auto BufferOrError = setupMemoryBuffer(Path); + auto BufferOrError = setupMemoryBuffer(Path, FS); if (Error E = BufferOrError.takeError()) return std::move(E); return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); @@ -201,9 +202,10 @@ InstrProfReader::create(std::unique_ptr Buffer, } Expected> -IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { +IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, + const Twine &RemappingPath) { // Set up the buffer to read. - auto BufferOrError = setupMemoryBuffer(Path); + auto BufferOrError = setupMemoryBuffer(Path, FS); if (Error E = BufferOrError.takeError()) return std::move(E); @@ -211,7 +213,7 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { std::unique_ptr RemappingBuffer; std::string RemappingPathStr = RemappingPath.str(); if (!RemappingPathStr.empty()) { - auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); + auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS); if (Error E = RemappingBufferOrError.takeError()) return std::move(E); RemappingBuffer = std::move(RemappingBufferOrError.get()); diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index d3753d1e8a99..7fa3d5c535c2 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/LineIterator.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -1831,8 +1832,9 @@ SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { /// /// \returns an error code indicating the status of the buffer. static ErrorOr> -setupMemoryBuffer(const Twine &Filename) { - auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); +setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { + auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() + : FS.getBufferForFile(Filename); if (std::error_code EC = BufferOrErr.getError()) return EC; auto Buffer = std::move(BufferOrErr.get()); @@ -1853,12 +1855,12 @@ setupMemoryBuffer(const Twine &Filename) { /// \returns an error code indicating the status of the created reader. ErrorOr> SampleProfileReader::create(const std::string Filename, LLVMContext &C, - FSDiscriminatorPass P, + vfs::FileSystem &FS, FSDiscriminatorPass P, const std::string RemapFilename) { - auto BufferOrError = setupMemoryBuffer(Filename); + auto BufferOrError = setupMemoryBuffer(Filename, FS); if (std::error_code EC = BufferOrError.getError()) return EC; - return create(BufferOrError.get(), C, P, RemapFilename); + return create(BufferOrError.get(), C, FS, P, RemapFilename); } /// Create a sample profile remapper from the given input, to remap the @@ -1873,9 +1875,10 @@ SampleProfileReader::create(const std::string Filename, LLVMContext &C, /// \returns an error code indicating the status of the created reader. ErrorOr> SampleProfileReaderItaniumRemapper::create(const std::string Filename, + vfs::FileSystem &FS, SampleProfileReader &Reader, LLVMContext &C) { - auto BufferOrError = setupMemoryBuffer(Filename); + auto BufferOrError = setupMemoryBuffer(Filename, FS); if (std::error_code EC = BufferOrError.getError()) return EC; return create(BufferOrError.get(), Reader, C); @@ -1923,7 +1926,7 @@ SampleProfileReaderItaniumRemapper::create(std::unique_ptr &B, /// \returns an error code indicating the status of the created reader. ErrorOr> SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, - FSDiscriminatorPass P, + vfs::FileSystem &FS, FSDiscriminatorPass P, const std::string RemapFilename) { std::unique_ptr Reader; if (SampleProfileReaderRawBinary::hasFormat(*B)) @@ -1940,8 +1943,8 @@ SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, return sampleprof_error::unrecognized_format; if (!RemapFilename.empty()) { - auto ReaderOrErr = - SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); + auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create( + RemapFilename, FS, *Reader, C); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not create remapper: " + EC.message(); C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 4cbc3b79f3bb..fdb2a5dcaa1a 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -195,6 +195,7 @@ add_llvm_component_library(LLVMSupport NativeFormatting.cpp OptimizedStructLayout.cpp Optional.cpp + PGOOptions.cpp Parallel.cpp PluginLoader.cpp PrettyStackTrace.cpp diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp new file mode 100644 index 000000000000..d11528ca6dbc --- /dev/null +++ b/llvm/lib/Support/PGOOptions.cpp @@ -0,0 +1,52 @@ +//===------ PGOOptions.cpp -- PGO option tunables --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PGOOptions.h" +#include "llvm/Support/VirtualFileSystem.h" + +using namespace llvm; + +PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, + std::string ProfileRemappingFile, + IntrusiveRefCntPtr FS, PGOAction Action, + CSPGOAction CSAction, bool DebugInfoForProfiling, + bool PseudoProbeForProfiling) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), + CSAction(CSAction), + DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // If neither Action nor CSAction, DebugInfoForProfiling or + // PseudoProbeForProfiling needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + + // If we need to use the profile, the VFS cannot be nullptr. + assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse)); +} + +PGOOptions::PGOOptions(const PGOOptions &) = default; + +PGOOptions &PGOOptions::operator=(const PGOOptions &O) = default; + +PGOOptions::~PGOOptions() = default; diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 08dc514a6476..29ae05bf0c94 100644 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Transforms/IPO/SampleProfile.h" using namespace llvm; using namespace sampleprof; @@ -159,8 +160,10 @@ bool X86InsertPrefetch::doInitialization(Module &M) { return false; LLVMContext &Ctx = M.getContext(); + // TODO: Propagate virtual file system into LLVM targets. + auto FS = vfs::getRealFileSystem(); ErrorOr> ReaderOrErr = - SampleProfileReader::create(Filename, Ctx); + SampleProfileReader::create(Filename, Ctx, *FS); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg, diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 2c99290fe858..7a08536f9be5 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ProfiledCallGraph.h" @@ -506,10 +507,12 @@ class SampleProfileLoader final public: SampleProfileLoader( StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS, std::function GetAssumptionCache, std::function GetTargetTransformInfo, std::function GetTLI) - : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)), + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), + std::move(FS)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), LTOPhase(LTOPhase), @@ -2004,7 +2007,7 @@ bool SampleProfileLoader::doInitialization(Module &M, auto &Ctx = M.getContext(); auto ReaderOrErr = SampleProfileReader::create( - Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename); + Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); @@ -2589,6 +2592,11 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) return emitAnnotations(F); return false; } +SampleProfileLoaderPass::SampleProfileLoaderPass( + std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase, + IntrusiveRefCntPtr FS) + : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), + LTOPhase(LTOPhase), FS(std::move(FS)) {} PreservedAnalyses SampleProfileLoaderPass::run(Module &M, ModuleAnalysisManager &AM) { @@ -2605,11 +2613,14 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, return FAM.getResult(F); }; + if (!FS) + FS = vfs::getRealFileSystem(); + SampleProfileLoader SampleLoader( ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - LTOPhase, GetAssumptionCache, GetTTI, GetTLI); + LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI); if (!SampleLoader.doInitialization(M, &FAM)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 4d4eb6f8ce80..78440758cf15 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -110,6 +110,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/HashBuilder.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -2059,6 +2060,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, + vfs::FileSystem &FS, function_ref LookupTLI, function_ref LookupBPI, function_ref LookupBFI, @@ -2066,8 +2068,8 @@ static bool annotateAllFunctions( LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. - auto ReaderOrErr = - IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); + auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, + ProfileRemappingFileName); if (Error E = ReaderOrErr.takeError()) { handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { Ctx.diagnose( @@ -2249,15 +2251,18 @@ static bool annotateAllFunctions( return true; } -PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename, - bool IsCS) +PGOInstrumentationUse::PGOInstrumentationUse( + std::string Filename, std::string RemappingFilename, bool IsCS, + IntrusiveRefCntPtr VFS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), + FS(std::move(VFS)) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) ProfileRemappingFileName = PGOTestProfileRemappingFile; + if (!FS) + FS = vfs::getRealFileSystem(); } PreservedAnalyses PGOInstrumentationUse::run(Module &M, @@ -2276,7 +2281,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, auto *PSI = &AM.getResult(M); - if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, + if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index 7b71d5ad4554..733a58158afc 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -439,8 +439,9 @@ std::unique_ptr CodeCoverageTool::load() { if (modifiedTimeGT(ObjectFilename, PGOFilename)) warning("profile data may be out of date - object is newer", ObjectFilename); + auto FS = vfs::getRealFileSystem(); auto CoverageOrErr = - CoverageMapping::load(ObjectFilenames, PGOFilename, CoverageArches, + CoverageMapping::load(ObjectFilenames, PGOFilename, *FS, CoverageArches, ViewOpts.CompilationDirectory, BIDFetcher.get()); if (Error E = CoverageOrErr.takeError()) { error("Failed to load coverage: " + toString(std::move(E))); diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index a976f0604c1f..afe2769643a8 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -226,7 +227,8 @@ static void overlapInput(const std::string &BaseFilename, OverlapStats &Overlap, const OverlapFuncFilters &FuncFilter, raw_fd_ostream &OS, bool IsCS) { - auto ReaderOrErr = InstrProfReader::create(TestFilename); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); @@ -298,7 +300,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, return; } - auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); @@ -838,8 +841,9 @@ static void supplementInstrProfile( // Read sample profile. LLVMContext Context; + auto FS = vfs::getRealFileSystem(); auto ReaderOrErr = sampleprof::SampleProfileReader::create( - SampleFilename.str(), Context, FSDiscriminatorPassOption); + SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = ReaderOrErr.getError()) exitWithErrorCode(EC, SampleFilename); auto Reader = std::move(ReaderOrErr.get()); @@ -976,7 +980,8 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, std::optional ProfileIsProbeBased; std::optional ProfileIsCS; for (const auto &Input : Inputs) { - auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = ReaderOrErr.getError()) { warnOrExitGivenError(FailMode, EC, Input.Filename); @@ -2208,12 +2213,13 @@ std::error_code SampleOverlapAggregator::loadProfiles() { using namespace sampleprof; LLVMContext Context; - auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, + auto FS = vfs::getRealFileSystem(); + auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = BaseReaderOrErr.getError()) exitWithErrorCode(EC, BaseFilename); - auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, + auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS, FSDiscriminatorPassOption); if (std::error_code EC = TestReaderOrErr.getError()) exitWithErrorCode(EC, TestFilename); @@ -2391,7 +2397,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, exitWithError("JSON output is not supported for instr profiles"); if (SFormat == ShowFormat::Yaml) exitWithError("YAML output is not supported for instr profiles"); - auto ReaderOrErr = InstrProfReader::create(Filename); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = InstrProfReader::create(Filename, *FS); std::vector Cutoffs = std::move(DetailedSummaryCutoffs); if (ShowDetailedSummary && Cutoffs.empty()) { Cutoffs = ProfileSummaryBuilder::DefaultCutoffs; @@ -2761,8 +2768,9 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, exitWithError("YAML output is not supported for sample profiles"); using namespace sampleprof; LLVMContext Context; - auto ReaderOrErr = - SampleProfileReader::create(Filename, Context, FSDiscriminatorPassOption); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS, + FSDiscriminatorPassOption); if (std::error_code EC = ReaderOrErr.getError()) exitWithErrorCode(EC, Filename); diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index 596882c1b93f..3b974e25103a 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/VirtualFileSystem.h" static cl::OptionCategory ProfGenCategory("ProfGen Options"); @@ -157,7 +158,9 @@ int main(int argc, const char *argv[]) { if (SampleProfFilename.getNumOccurrences()) { LLVMContext Context; - auto ReaderOrErr = SampleProfileReader::create(SampleProfFilename, Context); + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = + SampleProfileReader::create(SampleProfFilename, Context, *FS); std::unique_ptr Reader = std::move(ReaderOrErr.get()); Reader->read(); diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index a8db0c62898e..bb9711e7aa65 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -31,6 +31,7 @@ #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" @@ -333,22 +334,25 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, bool EnableDebugify, bool VerifyDIPreserve) { bool VerifyEachPass = VK == VK_VerifyEachPass; + auto FS = vfs::getRealFileSystem(); std::optional P; switch (PGOKindFlag) { case InstrGen: - P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr); + P = PGOOptions(ProfileFile, "", "", FS, PGOOptions::IRInstr); break; case InstrUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse); + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS, + PGOOptions::IRUse); break; case SampleUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS, PGOOptions::SampleUse); break; case NoPGO: if (DebugInfoForProfiling || PseudoProbeForProfiling) - P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, - DebugInfoForProfiling, PseudoProbeForProfiling); + P = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, + PGOOptions::NoCSAction, DebugInfoForProfiling, + PseudoProbeForProfiling); else P = std::nullopt; } @@ -367,7 +371,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, P->CSAction = PGOOptions::CSIRInstr; P->CSProfileGenFile = CSProfileGenFile; } else - P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, + P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, FS, PGOOptions::NoAction, PGOOptions::CSIRInstr); } else /* CSPGOKindFlag == CSInstrUse */ { if (!P) { diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp index 92925c7e9e68..bd34b05336e1 100644 --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/SupportHelpers.h" #include "gtest/gtest.h" @@ -57,8 +58,9 @@ struct SampleProfTest : ::testing::Test { void readProfile(const Module &M, StringRef Profile, StringRef RemapFile = "") { + auto FS = vfs::getRealFileSystem(); auto ReaderOrErr = SampleProfileReader::create( - std::string(Profile), Context, FSDiscriminatorPass::Base, + std::string(Profile), Context, *FS, FSDiscriminatorPass::Base, std::string(RemapFile)); ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); -- Gitee From 4016ad64be85a93de03592e09216aa8437bd3b6e Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Fri, 23 Jun 2023 10:16:02 -0700 Subject: [PATCH 13/13] [llvm-profgen] Remove target triple check to allow for more targets Llvm-profgen internally uses the llvm libraries and the MCDesc interface to do disassembling and symblization and it never checks against target-specific instruction operators. This makes it quite transparent to targets and a first attempt for an aarch64 binary just works. Therefore I'm removing the unnecessary triple check to unblock for new targets. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D153449 --- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index f18c62d0dcd0..5d9a82ccf3e6 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -215,9 +215,7 @@ void ProfiledBinary::load() { exitWithError("not a valid Elf image", Path); TheTriple = Obj->makeTriple(); - // Current only support X86 - if (!TheTriple.isX86()) - exitWithError("unsupported target", TheTriple.getTriple()); + LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); // Find the preferred load address for text sections. -- Gitee