diff --git a/0003-Add-sum-aggregation-feature-for-cycles-event.patch b/0003-Add-sum-aggregation-feature-for-cycles-event.patch new file mode 100644 index 0000000000000000000000000000000000000000..9887955235f51c7add1d7cce8aa92bbdcbdd2e9b --- /dev/null +++ b/0003-Add-sum-aggregation-feature-for-cycles-event.patch @@ -0,0 +1,768 @@ +From d3c03a207734caadc4f91e1eb282d09aaa531aa0 Mon Sep 17 00:00:00 2001 +From: chenhong +Date: Sat, 8 Feb 2025 14:57:53 +0800 +Subject: [PATCH] Add sum aggregation feature for cycles event + +--- + instruction_map.cc | 25 +++++- + instruction_map.h | 3 +- + profile.cc | 88 ++++++++++++++----- + profile.h | 9 +- + profile_creator.cc | 17 +++- + profile_writer.cc | 37 +++++--- + profile_writer.h | 7 +- + sample_reader.cc | 12 +++ + sample_reader.h | 8 ++ + symbol_map.cc | 54 ++++++++++-- + symbol_map.h | 23 ++++- + .../src/quipper/perf_parser.cc | 2 +- + .../src/quipper/perf_parser.h | 7 ++ + .../src/quipper/perf_reader.cc | 6 ++ + .../src/quipper/perf_reader.h | 10 +++ + 15 files changed, 258 insertions(+), 50 deletions(-) + +diff --git a/instruction_map.cc b/instruction_map.cc +index a5111bd..00c506b 100644 +--- a/instruction_map.cc ++++ b/instruction_map.cc +@@ -12,12 +12,14 @@ + #include "addr2line.h" + #include "symbol_map.h" + #include "third_party/abseil/absl/strings/string_view.h" ++#include "third_party/abseil/absl/flags/flag.h" + + namespace devtools_crosstool_autofdo { + + void InstructionMap::BuildPerFunctionInstructionMap(absl::string_view name, + uint64_t start_addr, +- uint64_t end_addr) { ++ uint64_t end_addr, ++ bool is_repeat) { + if (start_addr >= end_addr) { + return; + } +@@ -33,8 +35,25 @@ void InstructionMap::BuildPerFunctionInstructionMap(absl::string_view name, + InstInfo *info = &inst_map_[addr - start_addr]; + addr2line_->GetInlineStack(addr, &info->source_stack); + if (!info->source_stack.empty()) { +- symbol_map_->AddSourceCount(name, info->source_stack, 0, 1, 1, +- SymbolMap::PERFDATA); ++ if (symbol_map_->GetProcessingEventName().empty()) { ++ // For perf sampling with single PMU event, even if we collect "cycles" ++ // info, we will not change operation from "MAX" to "SUM". ++ // "PERFDATA" and "AFDOPROTO" is "MAX", "AFDOPROFILE" is "SUM". ++ symbol_map_->AddSourceCount(name, info->source_stack, 0, 1, 1, ++ SymbolMap::PERFDATA, std::string(), is_repeat); ++ } else { ++ for (const auto& event_name : symbol_map_->GetProcessingEventName()) { ++ SymbolMap::DataSource data_source = SymbolMap::PERFDATA; ++ if (event_name.find("cycles") == 0 || absl::GetFlag(FLAGS_use_sum_aggr)) { ++ // When we collect "cycles" info by perf, switch aggregation ++ // operation from "MAX" to "SUM". ++ // "PERFDATA" and "AFDOPROTO" is "MAX", "AFDOPROFILE" is "SUM". ++ data_source = SymbolMap::AFDOPROFILE; ++ } ++ symbol_map_->AddSourceCount(name, info->source_stack, 0, 1, 1, ++ data_source, event_name, is_repeat); ++ } ++ } + } + } + } +diff --git a/instruction_map.h b/instruction_map.h +index 852d0fd..00bf752 100644 +--- a/instruction_map.h ++++ b/instruction_map.h +@@ -39,7 +39,8 @@ class InstructionMap { + + // Builds instruction map for a function. + void BuildPerFunctionInstructionMap(absl::string_view name, +- uint64_t start_addr, uint64_t end_addr); ++ uint64_t start_addr, uint64_t end_addr, ++ bool is_repeat); + + // Contains information about each instruction. + struct InstInfo { +diff --git a/profile.cc b/profile.cc +index ac0ef84..9eef3ac 100644 +--- a/profile.cc ++++ b/profile.cc +@@ -38,16 +38,19 @@ Profile::ProfileMaps *Profile::GetProfileMaps(uint64_t addr) { + uint64_t start_addr, end_addr; + if (symbol_map_->GetSymbolInfoByAddr(addr, &name, + &start_addr, &end_addr)) { +- std::pair ret = +- symbol_profile_maps_.insert( +- SymbolProfileMaps::value_type(*name, nullptr)); +- if (ret.second) { +- ret.first->second = new ProfileMaps(start_addr, end_addr); ++ uint32_t count = symbol_profile_maps_.count(*name); ++ if (count > 0) { ++ auto range = symbol_profile_maps_.equal_range(*name); ++ for (auto it = range.first; it != range.second; ++it) { ++ if (it->second->start_addr == start_addr) { ++ return it->second; ++ } ++ } + } +- return ret.first->second; +- } else { +- return nullptr; ++ auto ret = symbol_profile_maps_.insert({*name, new ProfileMaps(start_addr, end_addr)}); ++ return ret->second; + } ++ return NULL; + } + + void Profile::AggregatePerFunctionProfile(bool check_lbr_entry) { +@@ -77,6 +80,18 @@ void Profile::AggregatePerFunctionProfile(bool check_lbr_entry) { + } + #endif // HAVE_LLVM + ++ /* annotate all event profile data from sample_reader_ */ ++ const EventAddressCountMap *event_addr_count = &sample_reader_->event_address_count_map(); ++ for (const auto &[event, addr_count] : *event_addr_count) { ++ for (const auto &[addr, count] : addr_count) { ++ uint64_t vaddr = symbol_map_->get_static_vaddr(addr); ++ ProfileMaps *maps = GetProfileMaps(vaddr); ++ if (maps != nullptr) { ++ maps->event_address_count_map[event][vaddr] += count; ++ } ++ } ++ } ++ + const RangeCountMap *range_map = &sample_reader_->range_count_map(); + for (const auto &[range, count] : *range_map) { + uint64_t beg_vaddr = symbol_map_->get_static_vaddr(range.first); +@@ -142,11 +157,12 @@ uint64_t Profile::ProfileMaps::GetAggregatedCount() const { + } + + void Profile::ProcessPerFunctionProfile(absl::string_view func_name, +- const ProfileMaps &maps) { ++ const ProfileMaps &maps, ++ bool is_repeat) { + InstructionMap inst_map(addr2line_, symbol_map_); + // LOG(INFO) << "ProcessPerFunctionProfile: " << func_name; + inst_map.BuildPerFunctionInstructionMap(func_name, maps.start_addr, +- maps.end_addr); ++ maps.end_addr, is_repeat); + // LOG(INFO) << "Built instruction map for func: " << func_name; + + AddressCountMap map; +@@ -167,17 +183,46 @@ void Profile::ProcessPerFunctionProfile(absl::string_view func_name, + map_ptr = &maps.address_count_map; + } + +- for (const auto &[address, count] : *map_ptr) { +- const InstructionMap::InstInfo *info = inst_map.lookup(address); +- if (info == nullptr) { +- continue; ++ // For add SourceCount for each events. ++ if (!maps.event_address_count_map.empty()) { ++ for (const auto &[event, addr_count] : maps.event_address_count_map) { ++ for (const auto &[addr, count] : addr_count) { ++ const InstructionMap::InstInfo *info = inst_map.lookup(addr); ++ if (info == nullptr) { ++ continue; ++ } ++ if (!info->source_stack.empty()) { ++ SymbolMap::DataSource data_source = SymbolMap::PERFDATA; ++ if (event.find("cycles") == 0 || absl::GetFlag(FLAGS_use_sum_aggr)) { ++ // When we collect "cycles" info by perf, switch aggregation ++ // operation from "MAX" to "SUM". ++ // "PERFDATA" and "AFDOPROTO" is "MAX", "AFDOPROFILE" is "SUM". ++ data_source = SymbolMap::AFDOPROFILE; ++ } ++ symbol_map_->AddSourceCount( ++ func_name, info->source_stack, count, 0, ++ count * info->source_stack[0].DuplicationFactor(), ++ data_source, event, is_repeat); ++ } ++ } + } +- if (!info->source_stack.empty()) { +- // LOG(INFO) << "adding source count for func: " << func_name +- // << " address: " << address; +- symbol_map_->AddSourceCount(func_name, info->source_stack, count, 0, +- info->source_stack[0].DuplicationFactor(), +- SymbolMap::PERFDATA); ++ } else { ++ for (const auto &[addr, count] : *map_ptr) { ++ const InstructionMap::InstInfo *info = inst_map.lookup(addr); ++ if (info == nullptr) { ++ continue; ++ } ++ if (!info->source_stack.empty()) { ++ LOG(INFO) << "adding source count for func: " << func_name ++ << " address: " << addr; ++ // For perf sampling with single PMU event, even if we collect "cycles" ++ // info, we will not change operation from "MAX" to "SUM". ++ // "PERFDATA" and "AFDOPROTO" is "MAX", "AFDOPROFILE" is "SUM". ++ symbol_map_->AddSourceCount( ++ func_name, info->source_stack, count, 0, ++ count * info->source_stack[0].DuplicationFactor(), ++ SymbolMap::PERFDATA, std::string(), is_repeat); ++ } + } + } + +@@ -254,7 +299,8 @@ void Profile::ComputeProfile(bool check_lbr_entry) { + const uint64_t count = + symbol_counts.at(symbol_map_->GetOriginalName(name)); + if (symbol_map_->ShouldEmit(count)) { +- ProcessPerFunctionProfile(name, *profile); ++ bool is_repeat = symbol_profile_maps_.count(name) > 1; ++ ProcessPerFunctionProfile(name, *profile, is_repeat); + } + } + symbol_map_->ElideSuffixesAndMerge(); +diff --git a/profile.h b/profile.h +index e590fa8..6216607 100644 +--- a/profile.h ++++ b/profile.h +@@ -12,7 +12,7 @@ + + #include "base/integral_types.h" + #include "sample_reader.h" +-#include "third_party/abseil/absl/container/node_hash_map.h" ++#include "third_party/abseil/absl/container/btree_map.h" + #include "third_party/abseil/absl/strings/string_view.h" + + namespace devtools_crosstool_autofdo { +@@ -59,8 +59,10 @@ class Profile { + AddressCountMap address_count_map; + RangeCountMap range_count_map; + BranchCountMap branch_count_map; ++ // Map events to Address count. ++ EventAddressCountMap event_address_count_map; + }; +- typedef absl::node_hash_map SymbolProfileMaps; ++ typedef absl::btree_multimap SymbolProfileMaps; + + // Returns the profile maps for a give function. + ProfileMaps *GetProfileMaps(uint64_t addr); +@@ -72,7 +74,8 @@ class Profile { + // 1. Traverses all instructions to build instruction map. + // 2. Unwinds the inline stack to add symbol count to each inlined symbol. + void ProcessPerFunctionProfile(absl::string_view func_name, +- const ProfileMaps &map); ++ const ProfileMaps &map, ++ bool is_repeat); + + const SampleReader *sample_reader_; + const std::string binary_name_; +diff --git a/profile_creator.cc b/profile_creator.cc +index 378d8f8..db1342a 100644 +--- a/profile_creator.cc ++++ b/profile_creator.cc +@@ -152,7 +152,17 @@ bool ProfileCreator::CreateProfile(const std::string &input_profile_name, + } + #endif + +- return writer->WriteToFile(output_profile_name); ++ bool ret = true; ++ // The event_name_set stores target event name when processing more than one event in this perf.data, ++ // we can get the event name from event_name_set_ and write each event to it`s own profile. ++ if (!symbol_map.GetProcessingEventName().empty()) { ++ for (const auto &event_name : symbol_map.GetProcessingEventName()) { ++ ret &= writer->WriteToFile(output_profile_name + "." + event_name, event_name); ++ } ++ } else { ++ ret &= writer->WriteToFile(output_profile_name); ++ } ++ return ret; + } + + bool ProfileCreator::ReadSample(absl::string_view input_profile_name, +@@ -215,6 +225,11 @@ bool ProfileCreator::ComputeProfile(SymbolMap *symbol_map, + bool check_lbr_entry) { + if (!CheckAndAssignAddr2Line(symbol_map, Addr2line::Create(binary_))) + return false; ++ // Record sampled event name string. ++ // TODO: add set method instead of useing ref of local member. ++ for (const auto &[event_name, _] : sample_reader_->event_address_count_map()) { ++ symbol_map->GetProcessingEventName().insert(event_name); ++ } + Profile profile(sample_reader_, binary_, symbol_map->get_addr2line(), + symbol_map); + profile.ComputeProfile(check_lbr_entry); +diff --git a/profile_writer.cc b/profile_writer.cc +index 875ed06..4acceae 100644 +--- a/profile_writer.cc ++++ b/profile_writer.cc +@@ -98,16 +98,30 @@ class SourceProfileWriter: public SymbolTraverser { + SourceProfileWriter(const SourceProfileWriter &) = delete; + SourceProfileWriter &operator=(const SourceProfileWriter &) = delete; + +- static void Write(const SymbolMap &symbol_map, const StringIndexMap &map) { +- SourceProfileWriter writer(map); ++ static void Write(const SymbolMap &symbol_map, const StringIndexMap &map, std::string target_event_name) { ++ // writer.Start() is a SymbolTraverser method, so we must record target_event_name in SourceProfileWriter. ++ SourceProfileWriter writer(map, target_event_name); + writer.Start(symbol_map); + } + + protected: + virtual void Visit(const Symbol *node) { +- gcov_write_unsigned(node->pos_counts.size()); ++ if (!target_event_name_.empty()) { ++ gcov_write_unsigned(node->event_pos_counts.count(target_event_name_) ? ++ node->event_pos_counts.find(target_event_name_)->second.size() : ++ 0); ++ } else { ++ gcov_write_unsigned(node->pos_counts.size()); ++ } + gcov_write_unsigned(node->callsites.size()); +- for (const auto &pos_count : node->pos_counts) { ++ // When event_pos_counts is empty, return to avoid accessing a nonexistent map. ++ if (!target_event_name_.empty() && !node->event_pos_counts.count(target_event_name_)) { ++ return; ++ } ++ const auto& pos_count_ref = !target_event_name_.empty() ? ++ node->event_pos_counts.find(target_event_name_)->second : ++ node->pos_counts; ++ for (const auto &pos_count : pos_count_ref) { + uint64_t value = pos_count.first; + gcov_write_unsigned(SourceInfo::GenerateCompressedOffset(value)); + gcov_write_unsigned(pos_count.second.target_map.size()); +@@ -134,8 +148,10 @@ class SourceProfileWriter: public SymbolTraverser { + } + + private: +- explicit SourceProfileWriter(const StringIndexMap &map) : map_(map) {} +- ++ explicit SourceProfileWriter(const StringIndexMap &map, ++ const std::string target_event_name = std::string()) : ++ map_(map), ++ target_event_name_(target_event_name) {} + int GetStringIndex(const std::string &str) { + StringIndexMap::const_iterator ret = map_.find(str); + CHECK(ret != map_.end()); +@@ -143,9 +159,10 @@ class SourceProfileWriter: public SymbolTraverser { + } + + const StringIndexMap &map_; ++ std::string target_event_name_; // Record event name to distinguish different events. + }; + +-void AutoFDOProfileWriter::WriteFunctionProfile() { ++void AutoFDOProfileWriter::WriteFunctionProfile(std::string target_event_name) { + typedef std::map StringIndexMap; + // Map from a string to its index in this map. Providing a partial + // ordering of all output strings. +@@ -190,7 +207,7 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { + gcov_write_unsigned(GCOV_TAG_AFDO_FUNCTION); + gcov_write_unsigned(length.length() + 1); + gcov_write_unsigned(length.num_functions()); +- SourceProfileWriter::Write(*symbol_map_, string_index_map); ++ SourceProfileWriter::Write(*symbol_map_, string_index_map, target_event_name); + } + + void AutoFDOProfileWriter::WriteModuleGroup() { +@@ -211,14 +228,14 @@ void AutoFDOProfileWriter::WriteWorkingSet() { + } + } + +-bool AutoFDOProfileWriter::WriteToFile(const std::string &output_filename) { ++bool AutoFDOProfileWriter::WriteToFile(const string &output_filename, string event_name) { + if (absl::GetFlag(FLAGS_debug_dump)) + Dump(); + + if (!WriteHeader(output_filename)) { + return false; + } +- WriteFunctionProfile(); ++ WriteFunctionProfile(event_name); + WriteModuleGroup(); + WriteWorkingSet(); + if (!WriteFinish()) { +diff --git a/profile_writer.h b/profile_writer.h +index 2557b08..5933917 100644 +--- a/profile_writer.h ++++ b/profile_writer.h +@@ -23,8 +23,9 @@ class ProfileWriter { + explicit ProfileWriter() : symbol_map_(nullptr) {} + virtual ~ProfileWriter() {} + +- virtual bool WriteToFile(const std::string &output_file) = 0; ++ virtual bool WriteToFile(const std::string &output_file, std::string event_name = std::string()) = 0; + void setSymbolMap(const SymbolMap *symbol_map) { symbol_map_ = symbol_map; } ++ const SymbolMap * getSymbolMap() const { return symbol_map_;} + void Dump(); + + protected: +@@ -39,7 +40,7 @@ class AutoFDOProfileWriter : public ProfileWriter { + explicit AutoFDOProfileWriter(uint32_t gcov_version) + : gcov_version_(gcov_version) {} + +- bool WriteToFile(const std::string &output_file) override; ++ bool WriteToFile(const string &output_file, std::string event_name = std::string()) override; + + private: + // Opens the output file, and writes the header. +@@ -87,7 +88,7 @@ class AutoFDOProfileWriter : public ProfileWriter { + // callsite_offset_2: symbol profile + // ... + // callsite_offset_num_callsites: symbol profile +- void WriteFunctionProfile(); ++ void WriteFunctionProfile(std::string target_event_name = std::string()); + + // Writes the module grouping info into the gcda file. + // TODO(b/132437226): LIPO has been deprecated so no module grouping info +diff --git a/sample_reader.cc b/sample_reader.cc +index f37be53..1c4816e 100644 +--- a/sample_reader.cc ++++ b/sample_reader.cc +@@ -30,6 +30,10 @@ ABSL_FLAG(uint64_t, strip_dup_backedge_stride_limit, 0x1000, + "Controls the limit of backedge stride hold by the heuristic " + "to strip duplicated entries in LBR stack. "); + ++ABSL_FLAG(uint32_t, sample_mapping_percentage_threshold, 95, ++ "when mapping perf sample events, at least this " ++ "percentage of them must be successfully mapped."); ++ + namespace devtools_crosstool_autofdo { + + PerfDataSampleReader::PerfDataSampleReader(absl::string_view profile_file, +@@ -269,6 +273,7 @@ bool TextSampleReaderWriter::IsFileExist() const { + bool PerfDataSampleReader::Append(const std::string &profile_file) { + quipper::PerfReader reader; + quipper::PerfParser parser(&reader); ++ parser.set_sample_mapping_percentage_threshold(absl::GetFlag(FLAGS_sample_mapping_percentage_threshold)); + if (!reader.ReadFile(profile_file) || !parser.ParseRawEvents()) { + return false; + } +@@ -296,6 +301,13 @@ bool PerfDataSampleReader::Append(const std::string &profile_file) { + } + if (MatchBinary(event.dso_and_offset)) { + address_count_map_[event.dso_and_offset.offset()]++; ++ // pmu event should be processed here, if event_name is not empty, it means there is at least ++ // two perf event in this perf.data, so we should record it into event_address_count_map_ for ++ // multiply event support. ++ auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); ++ if (!event_name.empty()) { ++ event_address_count_map_[event_name][event.dso_and_offset.offset()]++; ++ } + } + int start_index = 0; + while (start_index < event.branch_stack.size() && +diff --git a/sample_reader.h b/sample_reader.h +index 09f057b..c097ddc 100644 +--- a/sample_reader.h ++++ b/sample_reader.h +@@ -31,6 +31,7 @@ typedef std::pair Range; + typedef std::map RangeCountMap; + typedef std::pair Branch; + typedef std::map BranchCountMap; ++typedef std::map EventAddressCountMap; + + // Reads in the profile data, and represent it in address_count_map_. + class SampleReader { +@@ -52,6 +53,10 @@ class SampleReader { + return branch_count_map_; + } + ++ const EventAddressCountMap &event_address_count_map() const { ++ return event_address_count_map_; ++ } ++ + std::set GetSampledAddresses() const; + + // Returns the sample count for a given instruction. +@@ -65,6 +70,7 @@ class SampleReader { + address_count_map_.clear(); + range_count_map_.clear(); + branch_count_map_.clear(); ++ event_address_count_map_.clear(); + } + // Returns true if the sample is from Linux kernel. + bool IsKernelSample() const { return is_kernel_; } +@@ -77,6 +83,8 @@ class SampleReader { + AddressCountMap address_count_map_; + RangeCountMap range_count_map_; + BranchCountMap branch_count_map_; ++ // Map each event and address count. ++ EventAddressCountMap event_address_count_map_; + + bool is_kernel_ = false; + }; +diff --git a/symbol_map.cc b/symbol_map.cc +index 2483835..828264b 100644 +--- a/symbol_map.cc ++++ b/symbol_map.cc +@@ -65,6 +65,10 @@ ABSL_FLAG(bool, use_base_only_in_fs_discriminator, false, + "Tell the symbol map to only use base discriminators in fsprofile."); + #endif + ++ABSL_FLAG(bool, use_sum_aggr, false, ++ "Choose to use 'sum' or 'max' aggregation to process events. " ++ "true is 'sum', false is 'max'."); ++ + namespace { + // Prints some blank space for indentation. + void Indentation(int indent) { +@@ -515,13 +519,36 @@ void SymbolMap::AddSymbolEntryCount(absl::string_view symbol_name, + + Symbol *SymbolMap::TraverseInlineStack(absl::string_view symbol_name, + const SourceStack &src, uint64_t count, +- DataSource data_source) { ++ DataSource data_source, bool is_repeat) { + if (src.empty()) return nullptr; + bool use_discriminator_encoding = + absl::GetFlag(FLAGS_use_discriminator_encoding); +- Symbol *symbol = map_.find(symbol_name)->second; +- symbol->total_count += count; ++ Symbol *symbol = nullptr; + const SourceInfo &info = src[src.size() - 1]; ++ if (!is_repeat) { ++ symbol = map_.find(symbol_name)->second; ++ } else { ++ // First old map should be removed ++ if (auto it = map_.find(symbol_name); it != map_.end() && it->second != nullptr) { ++ delete it->second; ++ map_.erase(it); ++ } ++ // Second use #file_name#func_name as new symbol_name ++ std::string new_func_name = "#" + std::string(info.file_name) + ++ "#" + std::string(symbol_name); ++ if (!map_.count(new_func_name)) { ++ symbol = new Symbol(); ++ map_.insert({new_func_name, symbol}); ++ } else { ++ symbol = map_.find(new_func_name)->second; ++ } ++ } ++ if (symbol == nullptr) { ++ LOG(FATAL) << "Build symbol map error! file/func_name:" << info.file_name << "/" ++ << symbol_name; ++ } ++ ++ symbol->total_count += count; + if (symbol->info.file_name.empty() && !info.file_name.empty()) { + symbol->info.file_name = info.file_name; + symbol->info.dir_name = info.dir_name; +@@ -549,13 +576,14 @@ Symbol *SymbolMap::TraverseInlineStack(absl::string_view symbol_name, + void SymbolMap::AddSourceCount(absl::string_view symbol_name, + const SourceStack &src, uint64_t count, + uint64_t num_inst, uint32_t duplication, +- DataSource data_source) { ++ DataSource data_source, string event_name, ++ bool is_repeat) { + bool use_discriminator_encoding = + absl::GetFlag(FLAGS_use_discriminator_encoding); + if (duplication != 1 && + absl::GetFlag(FLAGS_use_discriminator_multiply_factor)) + count *= duplication; +- Symbol *symbol = TraverseInlineStack(symbol_name, src, count, data_source); ++ Symbol *symbol = TraverseInlineStack(symbol_name, src, count, data_source, is_repeat); + if (!symbol) return; + bool need_conversion = (data_source == PERFDATA || data_source == AFDOPROTO); + if (need_conversion && src[0].HasInvalidInfo()) return; +@@ -567,8 +595,18 @@ void SymbolMap::AddSourceCount(absl::string_view symbol_name, + if (count > symbol->pos_counts[offset].count) { + symbol->pos_counts[offset].count = count; + } ++ // symbol->pos_count is used for processing single event. If there is more ++ // than one processing event, event_name will not empty, so we need to record ++ // this sample in symbol->event_pos_counts. ++ if (!event_name.empty() && ++ count > symbol->event_pos_counts[event_name][offset].count) { ++ symbol->event_pos_counts[event_name][offset].count = count; ++ } + } else { + symbol->pos_counts[offset].count += count; ++ if (!event_name.empty()) { ++ symbol->event_pos_counts[event_name][offset].count += count; ++ } + } + symbol->pos_counts[offset].num_inst += num_inst; + } +@@ -586,6 +624,12 @@ bool SymbolMap::AddIndirectCallTarget(absl::string_view symbol_name, + return false; + symbol->pos_counts[src[0].Offset(use_discriminator_encoding)] + .target_map[GetOriginalName(target)] = count; ++ if (!symbol->event_pos_counts.empty()) { ++ for (auto& pos_count : symbol->event_pos_counts) { ++ pos_count.second[src[0].Offset(use_discriminator_encoding)].target_map[ ++ GetOriginalName(target)] = count; ++ } ++ } + return true; + } + +diff --git a/symbol_map.h b/symbol_map.h +index 9e47211..60a6887 100644 +--- a/symbol_map.h ++++ b/symbol_map.h +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -45,6 +46,8 @@ ABSL_DECLARE_FLAG(bool, use_base_only_in_fs_discriminator); + + ABSL_DECLARE_FLAG(int, inline_instances_at_same_loc_cutoff); + ++ABSL_DECLARE_FLAG(bool, use_sum_aggr); ++ + namespace devtools_crosstool_autofdo { + + typedef absl::btree_map CallTargetCountMap; +@@ -92,6 +95,9 @@ typedef std::map SourceStackCountMap; + // Map from a source location (represented by offset+discriminator) to profile. + typedef std::map PositionCountMap; + ++// Map each event to its PositionCountMap. ++typedef std::map EventPositionCountMap; ++ + struct Callsite { + uint64_t location; + const char *callee_name; +@@ -152,6 +158,7 @@ class Symbol { + total_count(0), + total_count_incl(0), + head_count(0), ++ event_pos_counts(), + callsites(0), + pos_counts() { + } +@@ -162,6 +169,7 @@ class Symbol { + total_count(src->total_count), + total_count_incl(src->total_count_incl), + head_count(src->head_count), ++ event_pos_counts(), + callsites(0), + pos_counts() { + info.func_name = new_func_name; +@@ -242,6 +250,8 @@ class Symbol { + CallsiteMap callsites; + // Map from source location to count and instruction number. + PositionCountMap pos_counts; ++ // Map from event name to PositionCountMap ++ EventPositionCountMap event_pos_counts; + }; + + // Vector of unique pointers to symbols. +@@ -438,7 +448,9 @@ class SymbolMap { + void AddSourceCount(absl::string_view symbol, const SourceStack &source, + uint64_t count, uint64_t num_inst, + uint32_t duplication = 1, +- DataSource data_source = AFDOPROFILE); ++ DataSource data_source = AFDOPROFILE, ++ string event_name = string(), ++ bool is_repeat = false); + + // Generates hybrid profiles by flattening callsites whose total counts are + // below the threshold, recursively. This is a fine-grained flattening +@@ -493,7 +505,8 @@ class SymbolMap { + // profile. + Symbol *TraverseInlineStack(absl::string_view symbol, + const SourceStack &source, uint64_t count, +- DataSource data_source = AFDOPROFILE); ++ DataSource data_source = AFDOPROFILE, ++ bool is_repeat = false); + + // Updates function name, start_addr, end_addr of a function that has a + // given address. Returns false if no such symbol exists. +@@ -544,6 +557,11 @@ class SymbolMap { + // Validates if the current symbol map is sane. + bool Validate() const; + ++ // Return a set of event name strings that appear in this perf.data. ++ std::unordered_set &GetProcessingEventName() { ++ return event_name_set_; ++ } ++ + // For scenarios such as llc misses or profile-guided prefetching, we need to + // setup the symbol map such that the func_name appears to have samples. + // That data is ignored by readers. +@@ -618,6 +636,7 @@ class SymbolMap { + bool ignore_thresholds_; + uint8_t suffix_elision_policy_; + std::unique_ptr addr2line_; ++ std::unordered_set event_name_set_; + /* working_set_[i] stores # of instructions that consumes + i/NUM_GCOV_WORKING_SETS of total instruction counts. */ + gcov_working_set_info working_set_[NUM_GCOV_WORKING_SETS]; +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.cc b/third_party/perf_data_converter/src/quipper/perf_parser.cc +index 99e3b3a..7e78cb0 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.cc +@@ -94,7 +94,7 @@ bool PerfParser::ParseRawEvents() { + } + parsed_events_.resize(write_index); + +- ProcessEvents(); ++ if (!ProcessEvents()) return false; + + if (!options_.discard_unused_events) return true; + +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.h b/third_party/perf_data_converter/src/quipper/perf_parser.h +index ed22426..e238a4f 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.h ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.h +@@ -192,6 +192,13 @@ class PerfParser { + + // Pass in a struct containing various options. + void set_options(const PerfParserOptions& options) { options_ = options; } ++ ++ // Set sample_mapping_percentage_threshold to val. ++ void set_sample_mapping_percentage_threshold(int32_t val) { ++ if (val >= 0 && val <= 100) { ++ options_.sample_mapping_percentage_threshold = (float)val; ++ } ++ } + + // Gets parsed event/sample info from raw event data. Stores pointers to the + // raw events in an array of ParsedEvents. Does not own the raw events. It is +diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.cc b/third_party/perf_data_converter/src/quipper/perf_reader.cc +index 6a1850c..4a31eb5 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_reader.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_reader.cc +@@ -1543,6 +1543,12 @@ bool PerfReader::ReadEventDescMetadata(DataReader* data) { + LOG(ERROR) << "Error reading ID value for attr #" << i; + return false; + } ++ // There may be multiple event id corresponding to the same event name, ++ // so save each {id, attr.name}. At the same time, this function ++ // only enters when there are multiple events in perf.data. ++ if (!event_id_name_map_.count(id)) { ++ event_id_name_map_.insert({id, attr.name}); ++ } + } + if (!AddPerfFileAttr(attr)) { + return false; +diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.h b/third_party/perf_data_converter/src/quipper/perf_reader.h +index 392df07..b2a746e 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_reader.h ++++ b/third_party/perf_data_converter/src/quipper/perf_reader.h +@@ -139,6 +139,13 @@ class PerfReader { + const PerfDataProto_StringMetadata& string_metadata() const { + return proto_->string_metadata(); + } ++ ++ const std::string GetEventNameFromId(uint64_t id) const { ++ if (event_id_name_map_.count(id)) { ++ return event_id_name_map_.find(id)->second; ++ } ++ return std::string(); ++ } + + uint64_t metadata_mask() const { return proto_->metadata_mask().Get(0); } + +@@ -360,6 +367,9 @@ class PerfReader { + // file header, which may differ from the input file header, if any. + struct perf_file_header out_header_; + ++ // Map for record event id to name string ++ std::map event_id_name_map_; ++ + // For build-id embedded in MMAP2 records + std::unordered_set filenames_with_build_id_; + +-- +2.27.0 + diff --git a/autofdo.spec b/autofdo.spec index eedc945c167b85e2aab6aa4e548d12d6373b8588..d0e45f7949c380fb809244c07857acbe14b9f120 100644 --- a/autofdo.spec +++ b/autofdo.spec @@ -1,6 +1,6 @@ Name: autofdo Version: 0.30.1 -Release: 3 +Release: 4 Summary: A tool to convert perf.data profile to AutoFDO profile License: Apache-2.0 URL: https://github.com/google/autofdo @@ -10,6 +10,7 @@ Source0: %{name}-%{version}.tar.xz Source1: abseil.tar.xz Patch1: 0001-adjust-the-link-method-of-dependency-libraries.patch Patch2: 0002-unscaledcycleclock-remove-RISC-V-support.patch +Patch3: 0003-Add-sum-aggregation-feature-for-cycles-event.patch BuildRequires: gcc gcc-c++ libtool git cmake elfutils-libelf-devel openssl-devel pkg-config ninja-build gtest libunwind-devel protobuf-devel chrpath Requires: glibc openssl-libs elfutils libgcc libstdc++ zlib @@ -40,7 +41,13 @@ cp %{_builddir}/%{name}-%{version}/profile_merger ./ %{_bindir}/profile_merger %changelog -* Wed Feb 12 2025 Zhenyu Zhao - 0.30.1-3 +* Tue Feb 11 2025 chenhong - 0.30.1-4 +- Type: enhancement +- ID: NA +- SUG: NA +- DESC: Add sum aggregation feature for cycles event + +* Tue Feb 11 2025 Zhenyu Zhao - 0.30.1-3 - Type:update - ID:NA - SUG:NA