From 77ef6d595d548e0c636a5362e81ed7cdaa2136fc Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Tue, 19 Nov 2024 09:35:25 +0800 Subject: [PATCH] support D-FOT addrs data parsing for optimized binary --- ...rs-data-parsing-for-optimized-binary.patch | 226 ++++++++++++++++++ llvm-bolt.spec | 9 +- 2 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch diff --git a/0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch b/0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch new file mode 100644 index 0000000..8a3c9e0 --- /dev/null +++ b/0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch @@ -0,0 +1,226 @@ +From 525a2d44443547c0349198df18286f594d62d557 Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Tue, 19 Nov 2024 09:48:40 +0800 +Subject: [PATCH] support D-FOT addrs data parsing for optimized binary + +--- + bolt/include/bolt/Profile/DataAggregator.h | 31 ++++++++ + bolt/lib/Profile/DataAggregator.cpp | 86 +++++++++++++++++++++- + 2 files changed, 113 insertions(+), 4 deletions(-) + +diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h +index cc237a6..d352f1b 100644 +--- a/bolt/include/bolt/Profile/DataAggregator.h ++++ b/bolt/include/bolt/Profile/DataAggregator.h +@@ -102,6 +102,12 @@ private: + Type EntryType; + }; + ++ /// Used for parsing specific libkperf input files. ++ struct LibkperfDataEntry { ++ uint64_t Addr; ++ uint64_t Count; ++ }; ++ + struct Trace { + uint64_t From; + uint64_t To; +@@ -300,6 +306,9 @@ private: + /// Parse pre-aggregated LBR samples created by an external tool + ErrorOr parseAggregatedLBREntry(); + ++ /// Parse libkperf samples created by D-FOT ++ ErrorOr parseLibkperfDataEntry(); ++ + /// Parse either buildid:offset or just offset, representing a location in the + /// binary. Used exclusevely for pre-aggregated LBR samples. + ErrorOr parseLocationOrOffset(); +@@ -417,10 +426,32 @@ private: + /// B 4b196f 4b19e0 2 0 + void parsePreAggregated(); + ++ /// Coordinate reading and parsing of libkperf file ++ /// The regular perf2bolt aggregation job is to read perf output directly. ++ /// But in the oeaware framework, sampling is done by libkperf. ++ /// For data collected by sampling the BOLT-optimized binary, ++ /// oeaware can export addrs and counts. ++ /// In perf2bolt, with the help of the BAT section, ++ /// this data is converted to profile that is usable for the original binary. ++ /// ++ /// File format syntax: ++ /// - first line: ++ /// - the other lines: ++ /// ++ /// Example: ++ /// cycles ++ /// 40f544 1 ++ /// 40f750 2 ++ /// 40f810 53 ++ void parseLibkperfFile(); ++ + /// Parse the full output of pre-aggregated LBR samples generated by + /// an external tool. + std::error_code parsePreAggregatedLBRSamples(); + ++ /// Parse the libkperf samples ++ std::error_code parseLibkperfSamples(); ++ + /// Process parsed pre-aggregated data. + void processPreAggregated(); + +diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp +index 24dbe34..509e7c9 100644 +--- a/bolt/lib/Profile/DataAggregator.cpp ++++ b/bolt/lib/Profile/DataAggregator.cpp +@@ -85,6 +85,11 @@ cl::opt ReadPreAggregated( + "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), + cl::cat(AggregatorCategory)); + ++cl::opt ReadLibkperfFile( ++ "libkperf", cl::desc("skip perf and read data from a libkperf file format, " ++ "only for continuous optimizing with BAT"), ++ cl::cat(AggregatorCategory)); ++ + static cl::opt + TimeAggregator("time-aggr", + cl::desc("time BOLT aggregator"), +@@ -157,8 +162,8 @@ void DataAggregator::findPerfExecutable() { + void DataAggregator::start() { + outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; + +- // Don't launch perf for pre-aggregated files +- if (opts::ReadPreAggregated) ++ // Don't launch perf for pre-aggregated files and libkperf files ++ if (opts::ReadPreAggregated || opts::ReadLibkperfFile) + return; + + findPerfExecutable(); +@@ -193,7 +198,7 @@ void DataAggregator::start() { + } + + void DataAggregator::abort() { +- if (opts::ReadPreAggregated) ++ if (opts::ReadPreAggregated || opts::ReadLibkperfFile) + return; + + std::string Error; +@@ -313,6 +318,8 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) { + bool DataAggregator::checkPerfDataMagic(StringRef FileName) { + if (opts::ReadPreAggregated) + return true; ++ if (opts::ReadLibkperfFile) ++ return true; + + Expected FD = sys::fs::openNativeFileForRead(FileName); + if (!FD) { +@@ -359,6 +366,27 @@ void DataAggregator::parsePreAggregated() { + } + } + ++void DataAggregator::parseLibkperfFile() { ++ std::string Error; ++ ++ ErrorOr> MB = ++ MemoryBuffer::getFileOrSTDIN(Filename); ++ if (std::error_code EC = MB.getError()) { ++ errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " ++ << EC.message() << "\n"; ++ exit(1); ++ } ++ ++ FileBuf = std::move(*MB); ++ ParsingBuf = FileBuf->getBuffer(); ++ Col = 0; ++ Line = 0; ++ if (parseLibkperfSamples()) { ++ errs() << "PERF2BOLT: failed to parse libkperf samples\n"; ++ exit(1); ++ } ++} ++ + std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { + outs() << "PERF2BOLT: writing data for autofdo tools...\n"; + NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, +@@ -502,6 +530,11 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { + return Error::success(); + } + ++ if (opts::ReadLibkperfFile) { ++ parseLibkperfFile(); ++ return Error::success(); ++ } ++ + if (std::optional FileBuildID = BC.getFileBuildID()) { + outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; + processFileBuildID(*FileBuildID); +@@ -608,7 +641,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { + void DataAggregator::processProfile(BinaryContext &BC) { + if (opts::ReadPreAggregated) + processPreAggregated(); +- else if (opts::BasicAggregation) ++ else if (opts::BasicAggregation || opts::ReadLibkperfFile) + processBasicEvents(); + else + processBranchEvents(); +@@ -1206,6 +1239,28 @@ ErrorOr DataAggregator::parseLocationOrOffset() { + return Location(true, BuildID.get(), Offset.get()); + } + ++ErrorOr ++DataAggregator::parseLibkperfDataEntry() { ++ // ++ while (checkAndConsumeFS()) { ++ } ++ ErrorOr Addr = parseHexField(FieldSeparator); ++ if (std::error_code EC = Addr.getError()) ++ return EC; ++ while (checkAndConsumeFS()) { ++ } ++ ErrorOr Count = parseNumberField(FieldSeparator, true); ++ if (std::error_code EC = Count.getError()) ++ return EC; ++ ++ if (!checkAndConsumeNewLine()) { ++ reportError("expected end of line"); ++ return make_error_code(llvm::errc::io_error); ++ } ++ ++ return LibkperfDataEntry{Addr.get(), Count.get()}; ++} ++ + ErrorOr + DataAggregator::parseAggregatedLBREntry() { + while (checkAndConsumeFS()) { +@@ -1712,6 +1767,29 @@ void DataAggregator::processMemEvents() { + } + } + ++std::error_code DataAggregator::parseLibkperfSamples() { ++ outs() << "PERF2BOLT: parsing libkperf data...\n"; ++ NamedRegionTimer T("parseLibkperfData", "Parsing libkperf data", ++ TimerGroupName, TimerGroupDesc, opts::TimeAggregator); ++ bool FirstLine = true; ++ while (hasData()) { ++ if (FirstLine) { ++ ErrorOr Event = parseString('\n'); ++ if (std::error_code EC = Event.getError()) ++ return EC; ++ EventNames.insert(Event.get()); ++ FirstLine = false; ++ } ++ ErrorOr KperfEntry = parseLibkperfDataEntry(); ++ if (std::error_code EC = KperfEntry.getError()) ++ return EC; ++ ++ BasicSamples[KperfEntry->Addr] += KperfEntry->Count; ++ } ++ ++ return std::error_code(); ++} ++ + std::error_code DataAggregator::parsePreAggregatedLBRSamples() { + outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; + NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", +-- +2.39.5 (Apple Git-154) + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index f917b04..3d73779 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -22,7 +22,7 @@ Name: %{pkg_name} Version: %{bolt_version} -Release: 11 +Release: 12 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache 2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt @@ -40,6 +40,7 @@ Patch7: 0007-BOLT-Skip-PLT-search-for-zero-value-weak-reference-symbols. Patch8: 0008-merge-fdata-Support-process-no_lbr-profile-file.patch Patch9: 0009-support-aarch64-instrumentation.patch Patch10: 0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch +Patch11: 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -155,6 +156,12 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a %doc %{install_docdir} %changelog +* Tue Nov 19 2024 rfwang07 17.0.6-12 +- Type:Feature +- ID:NA +- SUG:NA +- DESC: support D-FOT addrs data parsing for optimized binary + * Mon Nov 18 2024 mwl2000 17.0.6-11 - Type:Feature - ID:NA -- Gitee