From 48308a854b4c2c4be1ae9b7a7d54f59595ecf4c9 Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Sun, 13 Feb 2022 14:58:14 +0800 Subject: [PATCH 1/2] [AutoFDO] Multiple event processing support 1. Support parser multiple types of events in one perf file, distinguishes different event with suffix. 2. Support process functions with same name, distinguished by #file_name#func_name. (cherry picked from commit f1174d86a3aca3836adb2bff5e2f7a792b2cd3ed) --- 0001-Multi-event-processing-support.patch | 697 ++++++++++++++++++++++ autofdo.spec | 17 +- 2 files changed, 711 insertions(+), 3 deletions(-) create mode 100644 0001-Multi-event-processing-support.patch diff --git a/0001-Multi-event-processing-support.patch b/0001-Multi-event-processing-support.patch new file mode 100644 index 0000000..46a4033 --- /dev/null +++ b/0001-Multi-event-processing-support.patch @@ -0,0 +1,697 @@ +diff --git a/instruction_map.cc b/instruction_map.cc +index 9342307..f460d67 100644 +--- a/instruction_map.cc ++++ b/instruction_map.cc +@@ -29,7 +29,7 @@ InstructionMap::~InstructionMap() { + } + + void InstructionMap::BuildPerFunctionInstructionMap( +- const string &name, uint64 start_addr, uint64 end_addr) { ++ const string &name, uint64 start_addr, uint64 end_addr, bool is_repeat) { + if (start_addr >= end_addr) { + return; + } +@@ -39,7 +39,11 @@ void InstructionMap::BuildPerFunctionInstructionMap( + inst_map_.insert(InstMap::value_type(addr, info)); + if (info->source_stack.size() > 0) { + symbol_map_->AddSourceCount(name, info->source_stack, 0, 1, +- SymbolMap::MAX); ++ SymbolMap::MAX, std::string(), is_repeat); ++ for (const auto& event_name : symbol_map_->GetProcessingEventName()) { ++ symbol_map_->AddSourceCount(name, info->source_stack, 0, 1, ++ SymbolMap::MAX, event_name, is_repeat); ++ } + } + } + } +diff --git a/instruction_map.h b/instruction_map.h +index 6a0a2bc..26287ee 100644 +--- a/instruction_map.h ++++ b/instruction_map.h +@@ -53,8 +53,8 @@ class InstructionMap { + } + + // Builds instruction map for a function. +- void BuildPerFunctionInstructionMap(const string &name, uint64 start_addr, +- uint64 end_addr); ++ void BuildPerFunctionInstructionMap(const std::string &name, ++ uint64 start_addr, uint64 end_addr, bool is_repeat); + + // Contains information about each instruction. + struct InstInfo { +diff --git a/profile.cc b/profile.cc +index 95005f6..a306562 100644 +--- a/profile.cc ++++ b/profile.cc +@@ -35,15 +35,19 @@ Profile::ProfileMaps *Profile::GetProfileMaps(uint64 addr) { + uint64 start_addr, end_addr; + if (symbol_map_->GetSymbolInfoByAddr(addr, &name, + &start_addr, &end_addr)) { +- std::pair ret = +- symbol_profile_maps_.insert(SymbolProfileMaps::value_type(*name, NULL)); +- if (ret.second) { +- ret.first->second = new ProfileMaps(start_addr, end_addr); ++ uint32_t count = symbol_profile_maps_.count(*name); ++ if (count > 0) { ++ auto range = symbol_profile_maps_.equal_range(*name); ++ for (auto it = range.first; it != range.second; ++it) { ++ if (it->second->start_addr == start_addr) { ++ return it->second; ++ } ++ } + } +- return ret.first->second; +- } else { +- return NULL; ++ auto ret = symbol_profile_maps_.insert({*name, new ProfileMaps(start_addr, end_addr)}); ++ return ret->second; + } ++ return NULL; + } + + void Profile::AggregatePerFunctionProfile() { +@@ -55,6 +59,16 @@ void Profile::AggregatePerFunctionProfile() { + maps->address_count_map[addr_count.first + start] += addr_count.second; + } + } ++ /* annotate all event profile data from sample_reader_ */ ++ for (const auto &event_addr_count : sample_reader_->event_address_count_map()) { ++ for (const auto &addr_count : event_addr_count.second) { ++ ProfileMaps *maps = GetProfileMaps(addr_count.first + start); ++ if (maps != nullptr) { ++ maps->event_address_count_map[event_addr_count.first][addr_count.first + start] += ++ event_addr_count.second.at(addr_count.first); ++ } ++ } ++ } + const RangeCountMap *range_map = &sample_reader_->range_count_map(); + for (const auto &range_count : *range_map) { + ProfileMaps *maps = GetProfileMaps(range_count.first.first + start); +@@ -92,10 +106,11 @@ uint64 Profile::ProfileMaps::GetAggregatedCount() const { + } + + void Profile::ProcessPerFunctionProfile(string func_name, +- const ProfileMaps &maps) { ++ const ProfileMaps &maps, ++ bool is_repeat) { + InstructionMap inst_map(addr2line_, symbol_map_); + inst_map.BuildPerFunctionInstructionMap(func_name, maps.start_addr, +- maps.end_addr); ++ maps.end_addr, is_repeat); + + AddressCountMap map; + const AddressCountMap *map_ptr; +@@ -117,21 +132,44 @@ void Profile::ProcessPerFunctionProfile(string func_name, + map_ptr = &maps.address_count_map; + } + +- for (const auto &address_count : *map_ptr) { +- InstructionMap::InstMap::const_iterator iter = +- inst_map.inst_map().find(address_count.first); +- if (iter == inst_map.inst_map().end()) { +- continue; +- } +- const InstructionMap::InstInfo *info = iter->second; +- if (info == NULL) { +- continue; ++ // For add SourceCount for each events. ++ if (!maps.event_address_count_map.empty()) { ++ for (const auto &event_address : maps.event_address_count_map) { ++ for (const auto &address_count : event_address.second) { ++ InstructionMap::InstMap::const_iterator iter = ++ inst_map.inst_map().find(address_count.first); ++ if (iter == inst_map.inst_map().end()) { ++ continue; ++ } ++ const InstructionMap::InstInfo *info = iter->second; ++ if (info == nullptr) { ++ continue; ++ } ++ if (info->source_stack.size() > 0) { ++ symbol_map_->AddSourceCount( ++ func_name, info->source_stack, ++ address_count.second * info->source_stack[0].DuplicationFactor(), 0, ++ SymbolMap::MAX, event_address.first, is_repeat); ++ } ++ } + } +- if (info->source_stack.size() > 0) { +- symbol_map_->AddSourceCount( +- func_name, info->source_stack, +- address_count.second * info->source_stack[0].DuplicationFactor(), 0, +- SymbolMap::MAX); ++ } else { ++ for (const auto &address_count : *map_ptr) { ++ InstructionMap::InstMap::const_iterator iter = ++ inst_map.inst_map().find(address_count.first); ++ if (iter == inst_map.inst_map().end()) { ++ continue; ++ } ++ const InstructionMap::InstInfo *info = iter->second; ++ if (info == NULL) { ++ continue; ++ } ++ if (info->source_stack.size() > 0) { ++ symbol_map_->AddSourceCount( ++ func_name, info->source_stack, ++ address_count.second * info->source_stack[0].DuplicationFactor(), 0, ++ SymbolMap::MAX, std::string(), is_repeat); ++ } + } + } + +@@ -178,8 +216,10 @@ void Profile::ComputeProfile() { + + // Traverse the symbol map to process the profiles. + for (const auto &symbol_profile : symbol_profile_maps_) { +- if (symbol_map_->ShouldEmit(symbol_profile.second->GetAggregatedCount())) +- ProcessPerFunctionProfile(symbol_profile.first, *symbol_profile.second); ++ if (symbol_map_->ShouldEmit(symbol_profile.second->GetAggregatedCount())) { ++ bool is_repeat = symbol_profile_maps_.count(symbol_profile.first) > 1; ++ ProcessPerFunctionProfile(symbol_profile.first, *symbol_profile.second, is_repeat); ++ } + } + symbol_map_->Merge(); + symbol_map_->ComputeWorkingSets(); +diff --git a/profile.h b/profile.h +index 810fb02..e851783 100644 +--- a/profile.h ++++ b/profile.h +@@ -62,8 +62,10 @@ class Profile { + AddressCountMap address_count_map; + RangeCountMap range_count_map; + BranchCountMap branch_count_map; ++ // Map events to Address count. ++ EventAddressCountMap event_address_count_map; + }; +- typedef map SymbolProfileMaps; ++ typedef std::multimap SymbolProfileMaps; + + // Returns the profile maps for a give function. + ProfileMaps *GetProfileMaps(uint64 addr); +@@ -74,7 +76,7 @@ class Profile { + // Builds function level profile for specified function: + // 1. Traverses all instructions to build instruction map. + // 2. Unwinds the inline stack to add symbol count to each inlined symbol. +- void ProcessPerFunctionProfile(string func_name, const ProfileMaps &map); ++ void ProcessPerFunctionProfile(string func_name, const ProfileMaps &map, bool is_repeat); + + const SampleReader *sample_reader_; + const string binary_name_; +diff --git a/profile_creator.cc b/profile_creator.cc +index 80b0288..0189310 100644 +--- a/profile_creator.cc ++++ b/profile_creator.cc +@@ -110,7 +110,16 @@ bool ProfileCreator::CreateProfile(const string &input_profile_name, + if (!ReadSample(input_profile_name, profiler)) return false; + if (!ComputeProfile(&symbol_map)) return false; + } +- bool ret = writer->WriteToFile(output_profile_name); ++ bool ret = true; ++ // The event_name_set stores target event name when processing more than one event in this perf.data, ++ // we can get the event name from event_name_set_ and write each event to it`s own profile. ++ if (!symbol_map.GetProcessingEventName().empty()) { ++ for (const auto &event_name : symbol_map.GetProcessingEventName()) { ++ ret &= writer->WriteToFile(output_profile_name + "." + event_name, event_name); ++ } ++ } else { ++ ret &= writer->WriteToFile(output_profile_name); ++ } + return ret; + } + +@@ -148,6 +157,11 @@ bool ProfileCreator::ComputeProfile(SymbolMap *symbol_map) { + symbol_map, + Addr2line::CreateWithSampledFunctions(binary_, &sampled_functions))) + return false; ++ // Record sampled event name string. ++ // TODO: add set method instead of useing ref of local member. ++ for (const auto &event_name : sample_reader_->event_address_count_map()) { ++ symbol_map->GetProcessingEventName().insert(event_name.first); ++ } + Profile profile(sample_reader_, binary_, symbol_map->get_addr2line(), + symbol_map); + profile.ComputeProfile(); +diff --git a/profile_writer.cc b/profile_writer.cc +index e9b9893..b6f888a 100644 +--- a/profile_writer.cc ++++ b/profile_writer.cc +@@ -102,16 +102,30 @@ class SourceProfileLengther: public SymbolTraverser { + + class SourceProfileWriter: public SymbolTraverser { + public: +- static void Write(const SymbolMap &symbol_map, const StringIndexMap &map) { +- SourceProfileWriter writer(map); ++ static void Write(const SymbolMap &symbol_map, const StringIndexMap &map, std::string target_event_name) { ++ // writer.Start() is a SymbolTraverser method, so we must record target_event_name in SourceProfileWriter. ++ SourceProfileWriter writer(map, target_event_name); + writer.Start(symbol_map); + } + + protected: + virtual void Visit(const Symbol *node) { +- gcov_write_unsigned(node->pos_counts.size()); ++ if (!target_event_name_.empty()) { ++ gcov_write_unsigned(node->event_pos_counts.count(target_event_name_) ? ++ node->event_pos_counts.find(target_event_name_)->second.size() : ++ 0); ++ } else { ++ gcov_write_unsigned(node->pos_counts.size()); ++ } + gcov_write_unsigned(node->callsites.size()); +- for (const auto &pos_count : node->pos_counts) { ++ // When event_pos_counts is empty, return to avoid accessing a nonexistent map. ++ if (!target_event_name_.empty() && !node->event_pos_counts.count(target_event_name_)) { ++ return; ++ } ++ const auto& pos_count_ref = !target_event_name_.empty() ? ++ node->event_pos_counts.find(target_event_name_)->second : ++ node->pos_counts; ++ for (const auto &pos_count : pos_count_ref) { + gcov_write_unsigned(pos_count.first); + gcov_write_unsigned(pos_count.second.target_map.size()); + gcov_write_counter(pos_count.second.count); +@@ -136,7 +150,10 @@ class SourceProfileWriter: public SymbolTraverser { + } + + private: +- explicit SourceProfileWriter(const StringIndexMap &map) : map_(map) {} ++ explicit SourceProfileWriter(const StringIndexMap &map, ++ const std::string target_event_name = std::string()) : ++ map_(map), ++ target_event_name_(target_event_name) {} + + int GetStringIndex(const string &str) { + StringIndexMap::const_iterator ret = map_.find(str); +@@ -145,10 +162,11 @@ class SourceProfileWriter: public SymbolTraverser { + } + + const StringIndexMap &map_; ++ std::string target_event_name_; // Record event name to distinguish different events. + DISALLOW_COPY_AND_ASSIGN(SourceProfileWriter); + }; + +-void AutoFDOProfileWriter::WriteFunctionProfile() { ++void AutoFDOProfileWriter::WriteFunctionProfile(std::string target_event_name) { + typedef std::map StringIndexMap; + // Map from a string to its index in this map. Providing a partial + // ordering of all output strings. +@@ -193,7 +211,7 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { + gcov_write_unsigned(GCOV_TAG_AFDO_FUNCTION); + gcov_write_unsigned(length.length() + 1); + gcov_write_unsigned(length.num_functions()); +- SourceProfileWriter::Write(*symbol_map_, string_index_map); ++ SourceProfileWriter::Write(*symbol_map_, string_index_map, target_event_name); + } + + void AutoFDOProfileWriter::WriteModuleGroup() { +@@ -327,11 +345,11 @@ void AutoFDOProfileWriter::WriteWorkingSet() { + } + } + +-bool AutoFDOProfileWriter::WriteToFile(const string &output_filename) { ++bool AutoFDOProfileWriter::WriteToFile(const string &output_filename, string event_name) { + if (!WriteHeader(output_filename, output_filename + ".imports")) { + return false; + } +- WriteFunctionProfile(); ++ WriteFunctionProfile(event_name); + WriteModuleGroup(); + WriteWorkingSet(); + if (!WriteFinish()) { +diff --git a/profile_writer.h b/profile_writer.h +index f7d27e0..e1a4137 100644 +--- a/profile_writer.h ++++ b/profile_writer.h +@@ -32,9 +32,10 @@ class ProfileWriter { + explicit ProfileWriter() : symbol_map_(nullptr), module_map_(nullptr) {} + virtual ~ProfileWriter() {} + +- virtual bool WriteToFile(const string &output_file) = 0; ++ virtual bool WriteToFile(const std::string &output_file, std::string event_name = std::string()) = 0; + void setSymbolMap(const SymbolMap *symbol_map) { symbol_map_ = symbol_map; } + void setModuleMap(const ModuleMap *module_map) { module_map_ = module_map; } ++ const SymbolMap * getSymbolMap() const { return symbol_map_;} + void Dump(); + + protected: +@@ -52,7 +53,7 @@ class AutoFDOProfileWriter : public ProfileWriter { + explicit AutoFDOProfileWriter(uint32 gcov_version) + : gcov_version_(gcov_version) {} + +- bool WriteToFile(const string &output_file) override; ++ bool WriteToFile(const string &output_file, std::string event_name = std::string()) override; + + private: + // Opens the output file, and writes the header. +@@ -100,7 +101,7 @@ class AutoFDOProfileWriter : public ProfileWriter { + // callsite_offset_2: symbol profile + // ... + // callsite_offset_num_callsites: symbol profile +- void WriteFunctionProfile(); ++ void WriteFunctionProfile(std::string target_event_name = std::string()); + + // Writes the module grouping info into the gcda file. This is stored + // under the section tagged GCOV_TAG_MODULE_GROUPING: +diff --git a/sample_reader.cc b/sample_reader.cc +index 3071582..43a03cb 100644 +--- a/sample_reader.cc ++++ b/sample_reader.cc +@@ -19,9 +19,13 @@ + #include + #include + ++#include "gflags/gflags.h" + #include "base/logging.h" + #include "third_party/perf_data_converter/src/quipper/perf_parser.h" + ++DEFINE_int32(sample_mapping_percentage_threshold, 95, ++ "when mapping perf sample events, at least this " ++ "percentage of them must be successfully mapped."); + namespace { + // Returns true if name equals full_name, or full_name is empty and name + // matches re. +@@ -201,6 +205,7 @@ bool TextSampleReaderWriter::IsFileExist() const { + bool PerfDataSampleReader::Append(const string &profile_file) { + quipper::PerfReader reader; + quipper::PerfParser parser(&reader); ++ parser.set_sample_mapping_percentage_threshold(FLAGS_sample_mapping_percentage_threshold); + if (!reader.ReadFile(profile_file) || !parser.ParseRawEvents()) { + return false; + } +@@ -217,6 +222,13 @@ bool PerfDataSampleReader::Append(const string &profile_file) { + } + if (MatchBinary(event.dso_and_offset.dso_name(), focus_binary)) { + address_count_map_[event.dso_and_offset.offset()]++; ++ // pmu event should be processed here, if event_name is not empty, it means there is at least ++ // two perf event in this perf.data, so we should record it into event_address_count_map_ for ++ // multiply event support. ++ auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); ++ if (!event_name.empty()) { ++ event_address_count_map_[event_name][event.dso_and_offset.offset()]++; ++ } + } + if (event.branch_stack.size() > 0 && + MatchBinary(event.branch_stack[0].to.dso_name(), focus_binary) && +diff --git a/sample_reader.h b/sample_reader.h +index 6932118..846f741 100644 +--- a/sample_reader.h ++++ b/sample_reader.h +@@ -38,6 +38,7 @@ typedef pair Range; + typedef map RangeCountMap; + typedef pair Branch; + typedef map BranchCountMap; ++typedef map EventAddressCountMap; + + // Reads in the profile data, and represent it in address_count_map_. + class SampleReader { +@@ -59,6 +60,10 @@ class SampleReader { + return branch_count_map_; + } + ++ const EventAddressCountMap &event_address_count_map() const { ++ return event_address_count_map_; ++ } ++ + set GetSampledAddresses() const; + + // Returns the sample count for a given instruction. +@@ -74,6 +79,7 @@ class SampleReader { + address_count_map_.clear(); + range_count_map_.clear(); + branch_count_map_.clear(); ++ event_address_count_map_.clear(); + } + + protected: +@@ -84,6 +90,8 @@ class SampleReader { + AddressCountMap address_count_map_; + RangeCountMap range_count_map_; + BranchCountMap branch_count_map_; ++ // Map each event and address count. ++ EventAddressCountMap event_address_count_map_; + }; + + // Base class that reads in the profile from a sample data file. +diff --git a/symbol_map.cc b/symbol_map.cc +index ed048bf..233bbb2 100644 +--- a/symbol_map.cc ++++ b/symbol_map.cc +@@ -326,11 +326,34 @@ void SymbolMap::AddSymbolEntryCount(const string &symbol_name, uint64 count) { + + Symbol *SymbolMap::TraverseInlineStack(const string &symbol_name, + const SourceStack &src, +- uint64 count) { ++ uint64 count, bool is_repeat) { + if (src.empty()) return nullptr; +- Symbol *symbol = map_.find(symbol_name)->second; +- symbol->total_count += count; ++ Symbol *symbol = nullptr; + const SourceInfo &info = src[src.size() - 1]; ++ if (!is_repeat) { ++ symbol = map_.find(symbol_name)->second; ++ } else { ++ // First old map should be removed ++ if (auto it = map_.find(symbol_name); it != map_.end() && it->second != nullptr) { ++ delete it->second; ++ map_.erase(it); ++ } ++ // Second use #file_name#func_name as new symbol_name ++ std::string new_func_name = "#" + std::string(info.file_name) + ++ "#" + symbol_name; ++ if (!map_.count(new_func_name)) { ++ symbol = new Symbol(); ++ map_.insert({new_func_name, symbol}); ++ } else { ++ symbol = map_.find(new_func_name)->second; ++ } ++ } ++ if (symbol == nullptr) { ++ LOG(FATAL) << "Build symbol map error! file/func_name:" << info.file_name << "/" ++ << symbol_name; ++ } ++ ++ symbol->total_count += count; + if (symbol->info.file_name == NULL && info.file_name != NULL) { + symbol->info.file_name = info.file_name; + symbol->info.dir_name = info.dir_name; +@@ -356,8 +379,9 @@ Symbol *SymbolMap::TraverseInlineStack(const string &symbol_name, + void SymbolMap::AddSourceCount(const string &symbol_name, + const SourceStack &src, + uint64 count, uint64 num_inst, +- Operation op) { +- Symbol *symbol = TraverseInlineStack(symbol_name, src, count); ++ Operation op, string event_name, ++ bool is_repeat) { ++ Symbol *symbol = TraverseInlineStack(symbol_name, src, count, is_repeat); + if (!symbol) return; + + uint32 offset = src[0].Offset(use_discriminator_encoding_); +@@ -365,6 +389,13 @@ void SymbolMap::AddSourceCount(const string &symbol_name, + if (count > symbol->pos_counts[offset].count) { + symbol->pos_counts[offset].count = count; + } ++ // symbol->pos_count is used for processing single event. If there is more ++ // than one processing event, event_name will not empty, so we need to record ++ // this sample in symbol->event_pos_counts. ++ if (!event_name.empty() && ++ count > symbol->event_pos_counts[event_name][offset].count) { ++ symbol->event_pos_counts[event_name][offset].count = count; ++ } + } else if (op == SUM) { + symbol->pos_counts[offset].count += count; + } else { +@@ -382,6 +413,12 @@ bool SymbolMap::AddIndirectCallTarget(const string &symbol_name, + if (!symbol) return false; + symbol->pos_counts[src[0].Offset(use_discriminator_encoding_)].target_map[ + GetOriginalName(target.c_str())] = count; ++ if (!symbol->event_pos_counts.empty()) { ++ for (auto& pos_count : symbol->event_pos_counts) { ++ pos_count.second[src[0].Offset(use_discriminator_encoding_)].target_map[ ++ GetOriginalName(target.c_str())] = count; ++ } ++ } + return true; + } + +diff --git a/symbol_map.h b/symbol_map.h +index fc3f470..1c73ae5 100644 +--- a/symbol_map.h ++++ b/symbol_map.h +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -82,6 +83,9 @@ typedef std::map PositionCountMap; + // callsite_location, callee_name + typedef std::pair Callsite; + ++// Map each event to its PositionCountMap. ++typedef std::map EventPositionCountMap; ++ + struct CallsiteLess { + bool operator()(const Callsite& c1, const Callsite& c2) const { + if (c1.first != c2.first) +@@ -108,12 +112,12 @@ class Symbol { + // This constructor is used to create inlined symbol. + Symbol(const char *name, const char *dir, const char *file, uint32 start) + : info(SourceInfo(name, dir, file, start, 0, 0)), +- total_count(0), head_count(0) {} ++ total_count(0), head_count(0), event_pos_counts() {} + + // This constructor is used to create aliased symbol. + Symbol(const Symbol *src, const char *new_func_name) + : info(src->info), total_count(src->total_count), +- head_count(src->head_count) { ++ head_count(src->head_count), event_pos_counts() { + info.func_name = new_func_name; + } + +@@ -155,6 +159,8 @@ class Symbol { + CallsiteMap callsites; + // Map from source location to count and instruction number. + PositionCountMap pos_counts; ++ // Map from event name to PositionCountMap ++ EventPositionCountMap event_pos_counts; + }; + + // Maps function name to actual symbol. (Top level map). +@@ -288,7 +294,9 @@ class SymbolMap { + // num_inst: number of instructions that is mapped to the source. + // op: operation used to calculate count (SUM or MAX). + void AddSourceCount(const string &symbol, const SourceStack &source, +- uint64 count, uint64 num_inst, Operation op); ++ uint64 count, uint64 num_inst, Operation op, ++ string event_name = string(), ++ bool is_repeat = false); + + // Adds the indirect call target to source stack. + // symbol: name of the symbol in which source is located. +@@ -303,7 +311,7 @@ class SymbolMap { + // count to the total count in the inlined symbol. Returns the leaf symbol. If + // the inline stack is empty, returns nullptr without any other updates. + Symbol *TraverseInlineStack(const string &symbol, const SourceStack &source, +- uint64 count); ++ uint64 count, bool is_repeat = false); + + // Updates function name, start_addr, end_addr of a function that has a + // given address. Returns false if no such symbol exists. +@@ -363,6 +371,11 @@ class SymbolMap { + // Validates if the current symbol map is sane. + bool Validate() const; + ++ // Return a set of event name strings that appear in this perf.data. ++ std::unordered_set &GetProcessingEventName() { ++ return event_name_set_; ++ } ++ + private: + // Reads from the binary's elf section to build the symbol map. + void BuildSymbolMap(); +@@ -384,6 +397,7 @@ class SymbolMap { + bool use_discriminator_encoding_; + bool ignore_thresholds_; + std::unique_ptr addr2line_; ++ std::unordered_set event_name_set_; + /* working_set_[i] stores # of instructions that consumes + i/NUM_GCOV_WORKING_SETS of total instruction counts. */ + gcov_working_set_info working_set_[NUM_GCOV_WORKING_SETS]; +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.cc b/third_party/perf_data_converter/src/quipper/perf_parser.cc +index 0beef43..6755e5c 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.cc +@@ -98,7 +98,7 @@ bool PerfParser::ParseRawEvents() { + } + parsed_events_.resize(write_index); + +- ProcessEvents(); ++ if (!ProcessEvents()) return false; + + if (!options_.discard_unused_events) return true; + +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.h b/third_party/perf_data_converter/src/quipper/perf_parser.h +index 0910490..2ec734e 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.h ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.h +@@ -169,6 +169,13 @@ class PerfParser { + // Pass in a struct containing various options. + void set_options(const PerfParserOptions& options) { options_ = options; } + ++ // Set sample_mapping_percentage_threshold to val. ++ void set_sample_mapping_percentage_threshold(int32_t val) { ++ if (val >= 0 && val <= 100) { ++ options_.sample_mapping_percentage_threshold = (float)val; ++ } ++ } ++ + // Gets parsed event/sample info from raw event data. Stores pointers to the + // raw events in an array of ParsedEvents. Does not own the raw events. It is + // up to the user of this class to keep track of when these event pointers are +diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.cc b/third_party/perf_data_converter/src/quipper/perf_reader.cc +index 7397db6..3012cd9 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_reader.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_reader.cc +@@ -1073,6 +1073,12 @@ bool PerfReader::ReadEventDescMetadata(DataReader* data) { + LOG(ERROR) << "Error reading ID value for attr #" << i; + return false; + } ++ // There may be multiple event id corresponding to the same event name, ++ // so save each {id, attr.name}. At the same time, this function ++ // only enters when there are multiple events in perf.data. ++ if (!event_id_name_map_.count(id)) { ++ event_id_name_map_.insert({id, attr.name}); ++ } + } + AddPerfFileAttr(attr); + // The EVENT_DESC metadata is the newer replacement for the older event type +diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.h b/third_party/perf_data_converter/src/quipper/perf_reader.h +index 7d91f99..3044a5a 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_reader.h ++++ b/third_party/perf_data_converter/src/quipper/perf_reader.h +@@ -130,6 +130,13 @@ class PerfReader { + return proto_->string_metadata(); + } + ++ const string GetEventNameFromId(uint64_t id) const { ++ if (event_id_name_map_.count(id)) { ++ return event_id_name_map_.find(id)->second; ++ } ++ return std::string(); ++ } ++ + uint64_t metadata_mask() const { return proto_->metadata_mask().Get(0); } + + private: +@@ -304,6 +311,9 @@ class PerfReader { + // file header, which may differ from the input file header, if any. + struct perf_file_header out_header_; + ++ // Map for record event id to name string ++ std::map event_id_name_map_; ++ + DISALLOW_COPY_AND_ASSIGN(PerfReader); + }; + +diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.cc b/third_party/perf_data_converter/src/quipper/perf_serializer.cc +index 9653ab2..613189d 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_serializer.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_serializer.cc +@@ -230,6 +230,11 @@ bool PerfSerializer::SerializeKernelEvent( + case PERF_RECORD_LOST_SAMPLES: + return SerializeLostSamplesEvent( + event, event_proto->mutable_lost_samples_event()); ++ case PERF_RECORD_SWITCH: ++ case PERF_RECORD_SWITCH_CPU_WIDE: ++ case PERF_RECORD_NAMESPACES: ++ LOG(WARNING) << "Not support for autofdo v0.19, event type:" << event.header.type; ++ break; + default: + LOG(ERROR) << "Unknown event type: " << event.header.type; + } +-- +2.25.1 + diff --git a/autofdo.spec b/autofdo.spec index a0ecbb7..1b8ef0c 100644 --- a/autofdo.spec +++ b/autofdo.spec @@ -1,12 +1,17 @@ Name: autofdo Version: 0.19 -Release: 0 +Release: 1 Summary: A tool to convert perf.data profile to AutoFDO profile License: Apache-2.0 URL: https://github.com/google/autofdo +# The package in the original url below contains GIT information which is useless, +# so remove the GIT information and repackage it. +# Source: https://github.com/google/%{name}/releases/download/%{version}/{version}.tar.gz Source: %{name}-%{version}.tar.xz +Patch0: 0001-Multi-event-processing-support.patch + BuildRequires: gcc gcc-c++ libtool autoconf automake git elfutils-libelf-devel openssl-devel pkg-config -Requires: glibc openssl-libs elfutils libgcc libstdc++ zlib +Requires: glibc openssl-libs elfutils libgcc libstdc++ zlib %description This package contains a tool to convert perf.data profile to AutoFDO @@ -36,7 +41,13 @@ make -j 1 %exclude %{_bindir}/create_llvm_prof %changelog -* Tue Nov 30 2021 liyancheng <412998149@qq.com> +* Sun Feb 13 2022 liyancheng <412998149@qq.com> - 0.19-1 +- Type:enhancement +- ID:NA +- SUG:NA +- DESC:Support parsing multiple events and process repeat function + +* Tue Nov 30 2021 liyancheng <412998149@qq.com> - 0.19-0 - Type:Init - ID:NA - SUG:NA -- Gitee From 1515efea7da79148f7456378ffab754b04715d1d Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Sun, 13 Feb 2022 15:08:16 +0800 Subject: [PATCH 2/2] [Arm SPE] Arm SPE event processing support Support parser Arm SPE events, each event will be saved in different profile file. (cherry picked from commit e0d48adbcaefe8186fa776d62c9818716b71c5f1) --- 0002-Arm-spe-parser-support.patch | 1049 +++++++++++++++++++++++++++++ autofdo.spec | 9 +- 2 files changed, 1057 insertions(+), 1 deletion(-) create mode 100644 0002-Arm-spe-parser-support.patch diff --git a/0002-Arm-spe-parser-support.patch b/0002-Arm-spe-parser-support.patch new file mode 100644 index 0000000..336e7fd --- /dev/null +++ b/0002-Arm-spe-parser-support.patch @@ -0,0 +1,1049 @@ +diff --git a/Makefile.am b/Makefile.am +index a6b8e0f..cf37121 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -88,6 +88,8 @@ $(am_create_llvm_prof_OBJECTS): $(protoc_outputs) + noinst_LIBRARIES = libquipper.a + libquipper_a_SOURCES = \ + third_party/perf_data_converter/src/quipper/address_mapper.cc \ ++ third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc \ ++ third_party/perf_data_converter/src/quipper/arm_spe_parser.cc \ + third_party/perf_data_converter/src/quipper/binary_data_utils.cc \ + third_party/perf_data_converter/src/quipper/buffer_reader.cc \ + third_party/perf_data_converter/src/quipper/buffer_writer.cc \ +diff --git a/sample_reader.cc b/sample_reader.cc +index 43a03cb..ec21849 100644 +--- a/sample_reader.cc ++++ b/sample_reader.cc +@@ -216,18 +216,25 @@ bool PerfDataSampleReader::Append(const string &profile_file) { + // in the profile, then we use focus_binary to match samples. Otherwise, + // focus_binary_re_ is used to match the binary name with the samples. + for (const auto &event : parser.parsed_events()) { +- if (!event.event_ptr || +- event.event_ptr->header().type() != PERF_RECORD_SAMPLE) { ++ if (!event.arm_spe_event.ArmSpeEventExist() && (!event.event_ptr || ++ event.event_ptr->header().type() != PERF_RECORD_SAMPLE)) { + continue; + } + if (MatchBinary(event.dso_and_offset.dso_name(), focus_binary)) { + address_count_map_[event.dso_and_offset.offset()]++; +- // pmu event should be processed here, if event_name is not empty, it means there is at least +- // two perf event in this perf.data, so we should record it into event_address_count_map_ for +- // multiply event support. +- auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); +- if (!event_name.empty()) { +- event_address_count_map_[event_name][event.dso_and_offset.offset()]++; ++ // If this sample is an Arm SPE event, each target event will be decoded as a name string. ++ if (event.arm_spe_event.ArmSpeEventExist()) { ++ for (const auto &type_name : parser.GetArmSpeEventNameString(event.arm_spe_event.arm_spe_type_)) { ++ event_address_count_map_[type_name][event.dso_and_offset.offset()]++; ++ } ++ } else { ++ // pmu event should be processed here, if event_name is not empty, it means there is at least ++ // two perf event in this perf.data, so we should record it into event_address_count_map_ for ++ // multiply event support. ++ auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); ++ if (!event_name.empty()) { ++ event_address_count_map_[event_name][event.dso_and_offset.offset()]++; ++ } + } + } + if (event.branch_stack.size() > 0 && +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc +new file mode 100644 +index 0000000..88f7c81 +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc +@@ -0,0 +1,233 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#include ++#include "base/logging.h" ++ ++#include "arm_spe_decoder.h" ++ ++namespace quipper ++{ ++ ++std::string ArmSpePacketDecoder::GetArmParserStateString(ArmParserState state) ++{ ++ if (state == ArmParserState::START) { ++ return "START"; ++ } else if (state == ArmParserState::PC) { ++ return "PC"; ++ } else if (state == ArmParserState::LAT) { ++ return "LAT"; ++ } else if (state == ArmParserState::TYPE) { ++ return "TYPE"; ++ } else if (state == ArmParserState::LD_ST) { ++ return "LD_ST"; ++ } else if (state == ArmParserState::BRANCH) { ++ return "BRANCH"; ++ } else if (state == ArmParserState::TGT) { ++ return "TGT"; ++ } else if (state == ArmParserState::PAD) { ++ return "PAD"; ++ } else if (state == ArmParserState::INST_OTHER) { ++ return "INST_OTHER"; ++ } else if (state == ArmParserState::END) { ++ return "END"; ++ } else if (state == ArmParserState::EXIT) { ++ return "EXIT"; ++ } else if (state == ArmParserState::UNKNOWN) { ++ return "UNKNOWN"; ++ } ++ return "UNKNOWN"; ++} ++ ++static std::string Dec2hex(int num) ++{ ++ std::stringstream ioss; ++ std::string tmpStr; ++ ioss << std::hex << std::setw(2) << std::setfill('0') << num; ++ ioss >> tmpStr; ++ return tmpStr; ++} ++ ++void ArmSpePacketDecoder::HandleError() ++{ ++ LOG(ERROR) << "ARM SPE: only the following Arm SPE flags are supported: jitter, branch_filter, load_filter, store_filter, event_filter, min_latency."; ++ LOG(ERROR) << "ARM SPE: recommend usage: perf record -e arm_spe_0/jitter=1/ -c COUNT -- COMMAND"; ++ LOG(ERROR) << "ARM SPE: unsupport arm_spe raw string! error_pos/total_size: " << m_curPos << "/" << m_totalSize; ++ LOG(ERROR) << "ARM SPE: current processing status code: " << GetArmParserStateString(m_curParserState); ++ std::string errorByteStr; ++ for (size_t i = m_curPos; i < m_curPos + 10 && i < m_totalSize; i++) { ++ errorByteStr.append(Dec2hex(int(m_rawTraceData[i]))); ++ errorByteStr.append(" "); ++ } ++ if (!errorByteStr.empty()) { ++ LOG(FATAL) << "ARM SPE: recent error 10 byte: " << errorByteStr; ++ } else { ++ LOG(FATAL) << "ARM SPE: there is no valid byte."; ++ } ++} ++ ++bool ArmSpePacketDecoder::Init(const std::string* traceData) ++{ ++ if (traceData == nullptr || traceData->empty()) { ++ LOG(ERROR) << "TraceData is invalid!"; ++ return false; ++ } ++ m_rawTraceData = traceData->c_str(); ++ m_totalSize = traceData->size(); ++ m_curParserState = ArmParserState::START; ++ m_curPos = 0; ++ return true; ++} ++ ++void ArmSpePacketDecoder::InitProcessArmSpePacketMap() ++{ ++ m_decodeArmSpePacketMap.insert({ArmParserState::START, &ArmSpePacketDecoder::ProcessInitSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::PC, &ArmSpePacketDecoder::ProcessPCSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::LAT, &ArmSpePacketDecoder::ProcessLATSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::TYPE, &ArmSpePacketDecoder::ProcessTypeSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::LD_ST, &ArmSpePacketDecoder::ProcessLDSTSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::BRANCH, &ArmSpePacketDecoder::ProcessBranchTGTSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::INST_OTHER, &ArmSpePacketDecoder::ProcessInstOtherSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::PAD, &ArmSpePacketDecoder::ProcessPadSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::END, &ArmSpePacketDecoder::ProcessEndSection}); ++} ++ ++bool ArmSpePacketDecoder::IsUnsolvedPacketExist() const ++{ ++ if (m_totalSize > 0 && m_curPos < m_totalSize) { ++ return true; ++ } ++ return false; ++} ++ ++bool ArmSpePacketDecoder::ProcessInitSection(ArmSpeParsedPacket& packet) ++{ ++ if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PC) { ++ m_curPos++; ++ m_curParserState = ArmParserState::PC; ++ return true; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD) { ++ m_curParserState = ArmParserState::PAD; ++ return true; ++ } ++ return false; ++} ++ ++bool ArmSpePacketDecoder::ProcessPCSection(ArmSpeParsedPacket& packet) ++{ ++ constexpr size_t pcLen = 7; ++ memcpy(&packet.sampleAddr, &(m_rawTraceData[m_curPos]), pcLen); ++ m_curPos += pcLen; ++ m_curPos++; // 1 = en_ls(1) ++ m_curParserState = ArmParserState::LAT; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessLATSection(ArmSpeParsedPacket& packet) ++{ ++ /* LAT is useless for AutoFDO, ignore it. */ ++ m_curPos += 6; // 6 = LAT(6) ++ m_curParserState = ArmParserState::TYPE; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessLDSTSection(ArmSpeParsedPacket& packet) ++{ ++ /* Virtual Address and LAT is useless for AutoFDO, ignore it. */ ++ m_curPos += 15; // 15 = VA(9) + LAT(6) ++ m_curParserState = ArmParserState::PAD; ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_LD_ST; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessBranchTGTSection(ArmSpeParsedPacket& packet) ++{ ++ if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B_COND) { ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR_COND; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B_IND) { ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR_IND; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B) { ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR; ++ } else { ++ LOG(ERROR) << "ARM SPE: Unsupport branch tag!"; ++ return false; ++ } ++ m_curPos += 2; // 2 = BRANCH_TYPE(1) + TGT_TAG(1) ++ constexpr size_t pcLen = 7; // 7 = PC(7) ++ memcpy(&packet.targetAddr, &(m_rawTraceData[m_curPos]), pcLen); ++ m_curPos += pcLen + 1; // 1 = el_ns(1) ++ m_curParserState = ArmParserState::PAD; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessInstOtherSection(ArmSpeParsedPacket& packet) ++{ ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_OTHER; ++ m_curParserState = ArmParserState::PAD; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessPadSection(ArmSpeParsedPacket& packet) ++{ ++ while(m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD && m_curPos < m_totalSize - 1) { ++ m_curPos++; ++ } ++ m_curParserState = ArmParserState::END; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessEndSection(ArmSpeParsedPacket& packet) ++{ ++ m_curParserState = ArmParserState::EXIT; ++ if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD || m_rawTraceData[m_curPos] == ArmSpeInstTag::END) { ++ /* PAD tag means this is the end of Arm SPE section, there is no END tag. ++ END tag means that there are still packet s to be processed next. */ ++ m_curPos++; ++ return true; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PC) { ++ /* sometimes packet not end with ArmSpeTag::END but with ++ ArmSpeTag::PC, so it should be processed next. */ ++ return true; ++ } ++ LOG(ERROR) << "ARM SPE: Invalid end section!"; ++ return false; ++} ++ ++bool ArmSpePacketDecoder::ProcessTypeSection(ArmSpeParsedPacket& packet) ++{ ++ if (m_rawTraceData[m_curPos++] != ArmSpeInstTag::TYPE) { ++ LOG(ERROR) << "ARM SPE: Unsupport sample type section tag!"; ++ return false; ++ } ++ ++ memcpy(&packet.packetType, &(m_rawTraceData[m_curPos]), sizeof(packet.packetType)); ++ m_curPos += sizeof(packet.packetType); ++ ++ if (m_rawTraceData[m_curPos] == ArmSpeInstTag::LD_ST) { ++ m_curParserState = ArmParserState::LD_ST; ++ m_curPos += 2; // 2 = FLAG(2) ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::BRANCH) { ++ m_curParserState = ArmParserState::BRANCH; ++ m_curPos++; // 1 = BR_TAG(1) ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::INST_OTHER) { ++ m_curParserState = ArmParserState::INST_OTHER; ++ m_curPos += 2; // 2 = FLAG(2) ++ } else { ++ LOG(ERROR) << "ARM SPE: Unsupport instruction type tag!"; ++ return false; ++ } ++ return true; ++} ++ ++bool ArmSpePacketDecoder::SolveOnePacket(ArmSpeParsedPacket& packet) ++{ ++ m_curParserState = ArmParserState::START; ++ while (m_curParserState != ArmParserState::EXIT) { ++ if (!(this->*m_decodeArmSpePacketMap[m_curParserState])(packet)) { ++ return false; ++ } ++ } ++ return true; ++} ++ ++} // namespace quipper +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h +new file mode 100644 +index 0000000..7600bd0 +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h +@@ -0,0 +1,171 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#ifndef PERF_DATA_CONVENTER_ARM_SPE_DECODER ++#define PERF_DATA_CONVENTER_ARM_SPE_DECODER ++ ++#include ++#include ++#include ++#include ++ ++#ifndef BIT ++#define BIT(n) (1UL << (n)) ++#endif ++ ++/* ++ Example of supporting Arm SPE profile packet: ++ ++ 1. load/str/other instruction ++ b0 68 ad d0 0b fc ff 00 80 PC 0xfffc0bd0ad68 el0 ns=1 ++ 99 0a 00 LAT 10 ISSUE ++ 98 c1 00 LAT 193 TOT ++ 52 1e 06 EV RETIRED L1D-ACCESS L1D-REFILL TLB-ACCESS LLC-REFILL REMOTE-ACCESS ++ 49 00 LD ++ b2 d8 d6 b6 0b fc ff 00 00 VA 0xfffc0bb6d6d8 ++ 9a 01 00 LAT 1 XLAT ++ 9e b6 00 LAT 182 ++ 00 00 00 00 PAD ++ 01 END ++ ++ 2. branch instruction ++ b0 20 a7 d0 0b fc ff 00 80 PC 0xfffc0bd0a720 el0 ns=1 ++ 99 66 00 LAT 102 ISSUE ++ 98 67 00 LAT 103 TOT ++ 52 82 00 EV RETIRED MISPRED ++ 4a 01 B COND ++ b1 a8 ad d0 0b fc ff 00 80 TGT 0xfffc0bd0ada8 el0 ns=1 ++ 00 00 00 00 00 00 PAD ++ 01 END ++*/ ++ ++namespace quipper { ++/* Use bit state to represent Arm SPE types. ++ The bit status definition is the same as ArmSpeRawType */ ++using ArmSpeEventType = uint16_t; ++ ++/* DO NOT CHANGE IT, it`s define at kernel/tool/perf/.../arm_spe_decoder.h */ ++enum ArmSpeRawType { ++ EV_EXCEPTION_GEN = 0, // not used ++ EV_RETIRED = 1, ++ EV_L1D_ACCESS = 2, // not used ++ EV_L1D_REFILL = 3, ++ EV_TLB_ACCESS = 4, // not used ++ EV_TLB_REFILL = 5, // not used ++ EV_NOT_TAKEN = 6, // not used ++ EV_MISPRED = 7, // not used ++ EV_LLC_ACCESS = 8, // not used ++ EV_LLC_REFILL = 9, ++ EV_REMOTE_ACCESS = 10,// not used ++}; ++ ++/* Define instruction type of Arm SPE packet */ ++enum class ArmSpeInstType { ++ ARM_SPE_INST_OTHER, // other instruction ++ ARM_SPE_INST_LD_ST, // ld/str instruction ++ ARM_SPE_INST_BR, // branch instruction ++ ARM_SPE_INST_BR_COND, // branch condition ++ ARM_SPE_INST_BR_IND, // branch indirect call ++}; ++ ++inline bool IsArmSpeBranchInst(const ArmSpeInstType& type) ++{ ++ if (type == ArmSpeInstType::ARM_SPE_INST_BR || type == ArmSpeInstType::ARM_SPE_INST_BR_COND ++ || type == ArmSpeInstType::ARM_SPE_INST_BR_IND) { ++ return true; ++ } ++ return false; ++} ++ ++struct ArmSpeParsedPacket { ++ uint64_t sampleAddr = 0; // Address of sample instruction ++ uint64_t targetAddr = 0; // Branch instrution jump target address ++ ArmSpeEventType packetType = 0; // One packet may have multiply types, it represent as bit ++ ArmSpeInstType instType; // Instruction Type ++}; ++ ++class ArmSpePacketDecoder { ++public: ++ explicit ArmSpePacketDecoder() { ++ InitProcessArmSpePacketMap(); ++ } ++ ~ArmSpePacketDecoder() {} ++ ++ bool Init(const std::string* traceData); ++ ++ bool IsUnsolvedPacketExist() const; ++ ++ bool SolveOnePacket(ArmSpeParsedPacket& packet); ++ ++ void HandleError(); ++ ++private: ++ enum class ArmParserState { ++ START, ++ PC, ++ LAT, ++ TYPE, ++ LD_ST, ++ BRANCH, ++ TGT, ++ INST_OTHER, ++ PAD, ++ END, ++ EXIT, ++ UNKNOWN, ++ }; ++ ++ enum ArmSpeInstTag { ++ PC = 0xb0, ++ END = 0x01, ++ PAD = 0x00, ++ TYPE = 0x52, ++ LD_ST = 0x49, ++ BRANCH = 0x4a, ++ TGT = 0xb1, ++ INST_OTHER = 0x48, ++ }; ++ ++ enum ArmSpeBrTypeTag { ++ B = 0x00, // branch instruction ++ B_COND = 0x01, // condition instruction ++ B_IND = 0x02, // indirect branch instruction ++ }; ++ ++ /* Init Table-driven map to parser a SPE packet. */ ++ void InitProcessArmSpePacketMap(); ++ ++ bool ProcessInitSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessPCSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessLATSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessTypeSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessLDSTSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessBranchTGTSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessInstOtherSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessPadSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessEndSection(ArmSpeParsedPacket& packet); ++ ++ std::string GetArmParserStateString(ArmParserState); ++ ++ const char* m_rawTraceData = nullptr; ++ size_t m_totalSize = 0; ++ size_t m_curPos = 0; ++ enum ArmParserState m_curParserState; ++ ArmSpeInstType m_curInstType; ++ ++ /* Table-Driven approach to parser a SPE packet. */ ++ std::map m_decodeArmSpePacketMap; ++ ++}; // class ArmSpePacketDecoder ++ ++} // namespace quipper ++ ++#endif //PERF_DATA_CONVENTER_ARM_SPE_DECODER +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc b/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc +new file mode 100644 +index 0000000..123502d +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc +@@ -0,0 +1,72 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#include "base/logging.h" ++ ++#include "arm_spe_parser.h" ++ ++namespace quipper { ++inline bool ArmSpeParser::IsTargetEvent(const ArmSpeParsedPacket& rawPacket) const ++{ ++ if (rawPacket.packetType & BIT(ArmSpeRawType::EV_RETIRED) ++ || rawPacket.packetType & BIT(ArmSpeRawType::EV_LLC_REFILL) ++ || rawPacket.packetType & BIT(ArmSpeRawType::EV_L1D_REFILL)) { ++ return true; ++ } ++ return false; ++} ++ ++bool ArmSpeParser::ParserRawSpeData() ++{ ++ if (!m_armSpePacketDecoder.Init(m_rawTraceData)) { ++ m_armSpePacketDecoder.HandleError(); ++ return false; ++ } ++ while (m_armSpePacketDecoder.IsUnsolvedPacketExist()) { ++ if (ArmSpeParsedPacket packet; m_armSpePacketDecoder.SolveOnePacket(packet)) { ++ if (IsTargetEvent(packet)) { ++ SavePacket(packet); ++ } ++ } else { ++ m_armSpePacketDecoder.HandleError(); ++ return false; ++ } ++ } ++ return true; ++} ++ ++const std::vector ArmSpeParser::GetArmSpeEventNameString(ArmSpeEventType type) const ++{ ++ std::vector ret; ++ for (size_t i = 0; i < sizeof(ArmSpeEventType) * 8; i++) { ++ if (type & BIT(i)) { ++ switch (i) { ++ case ArmSpeRawType::EV_RETIRED: ++ ret.push_back("ARM_SPE_INST_RETIRED"); ++ break; ++ case ArmSpeRawType::EV_LLC_REFILL: ++ ret.push_back("ARM_SPE_LLC_MISS"); ++ break; ++ case ArmSpeRawType::EV_L1D_REFILL: ++ ret.push_back("ARM_SPE_L1D_MISS"); ++ break; ++ case ArmSpeRawType::EV_TLB_REFILL: ++ case ArmSpeRawType::EV_MISPRED: ++ case ArmSpeRawType::EV_REMOTE_ACCESS: ++ case ArmSpeRawType::EV_NOT_TAKEN: ++ case ArmSpeRawType::EV_EXCEPTION_GEN: ++ case ArmSpeRawType::EV_L1D_ACCESS: ++ case ArmSpeRawType::EV_LLC_ACCESS: ++ case ArmSpeRawType::EV_TLB_ACCESS: ++ /* Useless for AutoFDO, just skip it. */ ++ break; ++ default: ++ LOG(FATAL) << "ARM SPE: ArmSpeEventType not support this type: " ++ << static_cast(type); ++ } ++ } ++ } ++ return ret; ++} ++ ++} // namespace quipper +\ No newline at end of file +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_parser.h b/third_party/perf_data_converter/src/quipper/arm_spe_parser.h +new file mode 100644 +index 0000000..c515122 +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_parser.h +@@ -0,0 +1,90 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#ifndef PERF_DATA_CONVENTER_ARM_SPE_PARSER ++#define PERF_DATA_CONVENTER_ARM_SPE_PARSER ++ ++#include ++#include ++ ++#include "arm_spe_decoder.h" ++ ++namespace quipper { ++/* ++ This class is used to parser Arm SPE events, ++ usage as flollows: SetTraceDataPtr() -> ParserRawSpeData() -> GetParsedSpeEvent() ++*/ ++ ++class ArmSpeParser { ++public: ++ explicit ArmSpeParser() {} ++ ~ArmSpeParser() {} ++ ++ /* Parsing raw Arm SPE byte stream, return the result. */ ++ bool ParsingArmSpeData(const std::string* traceStrPtr) { ++ if (!SetTraceDatePtr(traceStrPtr)) { ++ return false; ++ } ++ return ParserRawSpeData(); ++ } ++ ++ /* Return a ref of parsed SPE packets. */ ++ const std::list& GetParsedSpeEvent() const { ++ return m_parsedSpeEvents; ++ } ++ ++ /* ArmSpeEventType is a highly compressed format, we need decode it into strings when use. */ ++ const std::vector GetArmSpeEventNameString(ArmSpeEventType) const; ++ ++ /* Free saving parsedSpeEvents. */ ++ void Clear() { ++ m_parsedSpeEvents.clear(); ++ } ++ ++ bool ArmSpeTraceDataExist() const { ++ return m_armSpeTraceDataExist; ++ } ++ ++ void SetArmSpeTraceDataExist(bool exist) { ++ m_armSpeTraceDataExist = exist; ++ } ++ ++private: ++ /* traceDataPtr is a pointer to Arm SPE data string, it must be called firstly. ++ DO NOT FREE traceStrPtr here, it`s belong to other modules. */ ++ bool SetTraceDatePtr(const std::string* traceStrPtr) { ++ if (traceStrPtr == nullptr || traceStrPtr->empty()) { ++ return false; ++ } ++ m_rawTraceData = traceStrPtr; ++ ++ return true; ++ } ++ ++ /* Parsing raw Arm SPE data, the target event will be saved in m_parsedSpeEvents. */ ++ bool ParserRawSpeData(); ++ ++ /* Return true if there is any target event we want. */ ++ bool IsTargetEvent(const ArmSpeParsedPacket&) const; ++ ++ inline void SavePacket(const ArmSpeParsedPacket& packet) { ++ m_parsedSpeEvents.push_back(packet); ++ } ++ ++ /* Record whether Arm SPE sample exists from PERF_RECORD_AUXTRACE_INFO. */ ++ bool m_armSpeTraceDataExist = false; ++ ++ /* Pointer to raw Arm SPE data, DO NOT FREE in this class. */ ++ const std::string* m_rawTraceData = nullptr; ++ ++ /* Save parsed Arm Spe event. */ ++ std::list m_parsedSpeEvents; ++ ++ /* Decoder is used for parser raw Arm SPE packet. */ ++ ArmSpePacketDecoder m_armSpePacketDecoder; ++ ++}; // class ArmSpeParser ++ ++} // namespace quipper ++ ++#endif // define PERF_DATA_CONVENTER_ARM_SPE_PARSER +\ No newline at end of file +diff --git a/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h b/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h +index 2225696..6bf6f15 100644 +--- a/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h ++++ b/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h +@@ -317,6 +317,16 @@ enum perf_user_event_type { + PERF_RECORD_HEADER_MAX = 81, + }; + ++// Taken from tools/perf/util/auxtrace.h ++enum auxtrace_type { ++ PERF_AUXTRACE_UNKNOWN, ++ PERF_AUXTRACE_INTEL_PT, ++ PERF_AUXTRACE_INTEL_BTS, ++ PERF_AUXTRACE_CS_ETM, ++ PERF_AUXTRACE_ARM_SPE, ++ PERF_AUXTRACE_S390_CPUMSF, ++}; ++ + struct attr_event { + struct perf_event_header header; + struct perf_event_attr attr; +@@ -342,6 +352,13 @@ struct tracing_data_event { + u32 size; + }; + ++struct auxtrace_info_event { ++ struct perf_event_header header; ++ u32 type; ++ u32 reserved__; /* For alignment */ ++ u64 priv[]; ++}; ++ + struct auxtrace_event { + struct perf_event_header header; + u64 size; +@@ -386,6 +403,7 @@ union perf_event { + struct event_type_event event_type; + struct tracing_data_event tracing_data; + struct build_id_event build_id; ++ struct auxtrace_info_event auxtrace_info; + struct auxtrace_event auxtrace; + struct aux_event aux; + struct itrace_start_event itrace_start; +diff --git a/third_party/perf_data_converter/src/quipper/perf_data.proto b/third_party/perf_data_converter/src/quipper/perf_data.proto +index faf0148..ebe72f1 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_data.proto ++++ b/third_party/perf_data_converter/src/quipper/perf_data.proto +@@ -463,6 +463,33 @@ message PerfDataProto { + optional SampleInfo sample_info = 6; + } + ++ // Next tag: 3 ++ message AuxtraceInfoEvent { ++ // Auxtrace type from the auxtrace_type enum in tools/perf/util/auxtrace.h. ++ optional uint32 type = 1; ++ ++ // Private data. ++ // WARNING: unparsed_binary_blob_priv_data contains unparsed private data ++ // specific to the type stored in the above field. This data is included to ++ // support serialization of a perf.data to perf_data.proto and ++ // deserialization of a perf_data.proto to perf.data. If this data is used ++ // for something other than the aforementioned usecase, this data has to be ++ // parsed based on the type. ++ // For example: ++ // If type == PERF_AUXTRACE_INTEL_PT, unparsed_binary_blob_priv_data ++ // contains fields filled by intel_pt_info_fill() function in the file ++ // tools/perf/arch/x86/util/intel-pt.c. ++ // If type == PERF_AUXTRACE_INTEL_BTS, unparsed_binary_blob_priv_data ++ // contains fields filled by intel_bts_info_fill() function in the file ++ // tools/perf/arch/x86/util/intel-bts.c. ++ // ++ // NOTE: Do not read this unparsed data directly. Quipper should be ++ // modified to parse the data into a new field before reading. Please ++ // contact developers of quipper to add support for parsing this data. ++ // ++ repeated uint64 unparsed_binary_blob_priv_data = 2; ++ } ++ + // Next tag: 8 + message AuxtraceEvent { + // Size of AUX area tracing buffer. +@@ -537,6 +564,7 @@ message PerfDataProto { + AuxEvent aux_event = 11; + ItraceStartEvent itrace_start_event = 13; + LostSamplesEvent lost_samples_event = 14; ++ AuxtraceInfoEvent auxtrace_info_event = 18; + AuxtraceEvent auxtrace_event = 12; + } + // Time after boot in nanoseconds corresponding to the event. +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.cc b/third_party/perf_data_converter/src/quipper/perf_parser.cc +index 6755e5c..8f9479c 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.cc +@@ -125,10 +125,77 @@ bool PerfParser::ParseRawEvents() { + return true; + } + ++const std::vector PerfParser::GetArmSpeEventNameString(ArmSpeEventType type) { ++ return spe_parser_.GetArmSpeEventNameString(type); ++} ++ ++bool PerfParser::MapArmSpeEvent(ParsedEvent& event, const ArmSpeParsedPacket& packet, const uint64_t pid) { ++ // Currently, We only use PID from process_map. ++ // Actually TID is not used by MapIPAndPidAndGetNameAndOffset ++ PidTid pidtid = std::make_pair(pid, pid); ++ uint64_t remapped_event_ip = 0; ++ if (!MapIPAndPidAndGetNameAndOffset(packet.sampleAddr, pidtid, ++ &remapped_event_ip, &event.dso_and_offset)) { ++ return false; ++ } ++ if (IsArmSpeBranchInst(packet.instType)) { ++ if (!MapIPAndPidAndGetNameAndOffset(packet.targetAddr, pidtid, ++ &remapped_event_ip, &event.arm_spe_event.arm_spe_br_tgt_)) { ++ return false; ++ } ++ event.arm_spe_event.arm_spe_inst_type_ = packet.instType; ++ } ++ event.arm_spe_event.arm_spe_type_ = packet.packetType; ++ return true; ++} ++ ++void PerfParser::ProcessArmSpeEvent() { ++ stats_.num_sample_events += spe_parser_.GetParsedSpeEvent().size(); ++ for (const auto& spe_event : spe_parser_.GetParsedSpeEvent()) { ++ for (const auto& process_map : process_mappers_) { ++ if (ParsedEvent event; MapArmSpeEvent(event, spe_event, process_map.first)) { ++ parsed_events_.push_back(event); ++ stats_.num_arm_spe_event_mapped++; ++ break; ++ } ++ } ++ } ++ stats_.num_sample_events_mapped += stats_.num_arm_spe_event_mapped; ++ spe_parser_.Clear(); ++} ++ + bool PerfParser::ProcessUserEvents(PerfEvent& event) { + // New user events from PERF-4.13 is not yet supported + switch (event.header().type()) { + case PERF_RECORD_AUXTRACE: ++ // This part may be called many times during processing, every parsed event ++ // will store in spe_parser_.GetParsedSpeEvent() ++ if (spe_parser_.ArmSpeTraceDataExist() && ++ spe_parser_.ParsingArmSpeData(&event.auxtrace_event().trace_data())) { ++ stats_.num_arm_spe_events = spe_parser_.GetParsedSpeEvent().size(); ++ } ++ break; ++ case PERF_RECORD_AUXTRACE_INFO: ++ switch (event.auxtrace_info_event().type()) ++ { ++ case PERF_AUXTRACE_ARM_SPE: ++ spe_parser_.SetArmSpeTraceDataExist(true); ++ break; ++ case PERF_AUXTRACE_UNKNOWN: ++ case PERF_AUXTRACE_INTEL_PT: ++ case PERF_AUXTRACE_INTEL_BTS: ++ case PERF_AUXTRACE_CS_ETM: ++ case PERF_AUXTRACE_S390_CPUMSF: ++ default: ++ VLOG(1) << "Unsupported PERF_RECORD_AUXTRACE_INFO: " << event.auxtrace_info_event().type(); ++ break; ++ } ++ case PERF_RECORD_AUXTRACE_ERROR: ++ case PERF_RECORD_THREAD_MAP: ++ case PERF_RECORD_STAT_CONFIG: ++ case PERF_RECORD_STAT: ++ case PERF_RECORD_STAT_ROUND: ++ case PERF_RECORD_TIME_CONV: + VLOG(1) << "Parsed event type: " << event.header().type() + << ". Doing nothing."; + break; +@@ -251,6 +318,10 @@ bool PerfParser::ProcessEvents() { + } + if (!FillInDsoBuildIds()) return false; + ++ if (!spe_parser_.GetParsedSpeEvent().empty()) { ++ ProcessArmSpeEvent(); ++ } ++ + // Print stats collected from parsing. + // clang-format off + LOG(INFO) << "Parser processed: " +@@ -259,7 +330,9 @@ bool PerfParser::ProcessEvents() { + << stats_.num_fork_events << " FORK events, " + << stats_.num_exit_events << " EXIT events, " + << stats_.num_sample_events << " SAMPLE events, " +- << stats_.num_sample_events_mapped << " of these were mapped"; ++ << stats_.num_sample_events_mapped << " of these were mapped, " ++ << stats_.num_arm_spe_events << " ARM_SPE events, " ++ << stats_.num_arm_spe_event_mapped << " of these ARM_SPE events were mapped."; + // clang-format on + + float sample_mapping_percentage = +@@ -269,7 +342,8 @@ bool PerfParser::ProcessEvents() { + if (sample_mapping_percentage < threshold) { + LOG(ERROR) << "Mapped " << static_cast(sample_mapping_percentage) + << "% of samples, expected at least " +- << static_cast(threshold) << "%"; ++ << static_cast(threshold) << "%," ++ << " adjust with option --sample_mapping_percentage_threshold"; + return false; + } + stats_.did_remap = options_.do_remap; +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.h b/third_party/perf_data_converter/src/quipper/perf_parser.h +index 2ec734e..37f7e34 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.h ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.h +@@ -22,6 +22,7 @@ + #include "compat/string.h" + #include "dso.h" + #include "perf_reader.h" ++#include "arm_spe_parser.h" + + namespace quipper { + +@@ -108,6 +109,43 @@ struct ParsedEvent { + std::equal(branch_stack.begin(), branch_stack.end(), + other.branch_stack.begin()); + } ++ ++ // A struct that contains Arm Spe event info, ++ // including event type, instruction type and branch ++ // target address. ++ struct ArmSpeEvent { ++ ArmSpeEventType arm_spe_type_ = 0; ++ ArmSpeInstType arm_spe_inst_type_; ++ DSOAndOffset arm_spe_br_tgt_; ++ ++ bool ArmSpeEventExist() const { ++ for (size_t i = 0; i < sizeof(ArmSpeEventType) * 8; i++) { ++ if (arm_spe_type_ & BIT(i)) { ++ switch (i) { ++ case ArmSpeRawType::EV_RETIRED: ++ case ArmSpeRawType::EV_LLC_REFILL: ++ case ArmSpeRawType::EV_L1D_REFILL: ++ /* Useful for AutoFDO */ ++ return true; ++ case ArmSpeRawType::EV_MISPRED: ++ case ArmSpeRawType::EV_REMOTE_ACCESS: ++ case ArmSpeRawType::EV_TLB_REFILL: ++ case ArmSpeRawType::EV_NOT_TAKEN: ++ case ArmSpeRawType::EV_EXCEPTION_GEN: ++ case ArmSpeRawType::EV_L1D_ACCESS: ++ case ArmSpeRawType::EV_LLC_ACCESS: ++ case ArmSpeRawType::EV_TLB_ACCESS: ++ /* Useless for AutoFDO, just skip it. */ ++ break; ++ default: ++ LOG(FATAL) << "ARM SPE: ArmSpeEventType not support this type: " ++ << static_cast(arm_spe_type_); ++ } ++ } ++ } ++ return false; ++ } ++ } arm_spe_event; + }; + + struct PerfEventStats { +@@ -117,12 +155,14 @@ struct PerfEventStats { + uint32_t num_comm_events; + uint32_t num_fork_events; + uint32_t num_exit_events; ++ uint32_t num_arm_spe_events; + + // Number of sample events that were successfully mapped using the address + // mapper. The mapping is recorded regardless of whether the address in the + // perf sample event itself was assigned the remapped address. The latter is + // indicated by |did_remap|. + uint32_t num_sample_events_mapped; ++ uint32_t num_arm_spe_event_mapped; + + // Whether address remapping was enabled during event parsing. + bool did_remap; +@@ -191,6 +231,9 @@ class PerfParser { + // Use with caution. Deserialization uses this to restore stats from proto. + PerfEventStats* mutable_stats() { return &stats_; } + ++ // ArmSpeEventType is a highly compressed format, we need decode it into strings when use. ++ const std::vector GetArmSpeEventNameString(ArmSpeEventType type); ++ + private: + // Used for processing events. e.g. remapping with synthetic addresses. + bool ProcessEvents(); +@@ -252,6 +295,12 @@ class PerfParser { + std::pair GetOrCreateProcessMapper( + uint32_t pid, uint32_t ppid = kKernelPid); + ++ // Map every raw ARM SPE event, save them into parsed_events_. ++ void ProcessArmSpeEvent(); ++ ++ // Map an ARM SPE event by PC(from sample) and pid. ++ bool MapArmSpeEvent(ParsedEvent& event, const ArmSpeParsedPacket& packet, const uint64_t pid); ++ + // Points to a PerfReader that contains the input perf data to parse. + PerfReader* const reader_; + +@@ -277,6 +326,9 @@ class PerfParser { + // Maps process ID to an address mapper for that process. + std::unordered_map> process_mappers_; + ++ // A member to process Arm SPE event ++ ArmSpeParser spe_parser_; ++ + DISALLOW_COPY_AND_ASSIGN(PerfParser); + }; + +diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.cc b/third_party/perf_data_converter/src/quipper/perf_reader.cc +index 3012cd9..2815b45 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_reader.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_reader.cc +@@ -1916,6 +1916,19 @@ void PerfReader::MaybeSwapEventFields(event_t* event, bool is_cross_endian) { + case PERF_RECORD_LOST_SAMPLES: + ByteSwap(&event->lost_samples.lost); + break; ++ case PERF_RECORD_AUXTRACE_INFO: { ++ ByteSwap(&event->auxtrace_info.type); ++ u64 priv_size = ++ (event->header.size - ++ (sizeof(event->header) + sizeof(event->auxtrace_info.type) + ++ sizeof(u32) // size of auxtrace_info_event.reserved__ ++ )) / ++ sizeof(u64); ++ for (u64 i = 0; i < priv_size; ++i) { ++ ByteSwap(&event->auxtrace_info.priv[i]); ++ } ++ break; ++ } + case PERF_RECORD_AUXTRACE: + ByteSwap(&event->auxtrace.size); + ByteSwap(&event->auxtrace.offset); +diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.cc b/third_party/perf_data_converter/src/quipper/perf_serializer.cc +index 613189d..2386f4d 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_serializer.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_serializer.cc +@@ -244,6 +244,9 @@ bool PerfSerializer::SerializeKernelEvent( + bool PerfSerializer::SerializeUserEvent( + const event_t& event, PerfDataProto_PerfEvent* event_proto) const { + switch (event.header.type) { ++ case PERF_RECORD_AUXTRACE_INFO: ++ return SerializeAuxtraceInfoEvent( ++ event, event_proto->mutable_auxtrace_info_event()); + case PERF_RECORD_AUXTRACE: + return SerializeAuxtraceEvent(event, + event_proto->mutable_auxtrace_event()); +@@ -329,6 +332,9 @@ bool PerfSerializer::DeserializeKernelEvent( + bool PerfSerializer::DeserializeUserEvent( + const PerfDataProto_PerfEvent& event_proto, event_t* event) const { + switch (event_proto.header().type()) { ++ case PERF_RECORD_AUXTRACE_INFO: ++ return DeserializeAuxtraceInfoEvent(event_proto.auxtrace_info_event(), ++ event); + case PERF_RECORD_AUXTRACE: + return DeserializeAuxtraceEvent(event_proto.auxtrace_event(), event); + default: +@@ -882,6 +888,35 @@ bool PerfSerializer::DeserializeBuildIDEvent( + return true; + } + ++bool PerfSerializer::SerializeAuxtraceInfoEvent( ++ const event_t& event, PerfDataProto_AuxtraceInfoEvent* sample) const { ++ const struct auxtrace_info_event& auxtrace_info = event.auxtrace_info; ++ u64 priv_size = ++ (event.header.size - sizeof(struct auxtrace_info_event)) / sizeof(u64); ++ sample->set_type(auxtrace_info.type); ++ if (auxtrace_info.reserved__ != 0) { ++ LOG(WARNING) << "PERF_RECORD_AUXTRACE_INFO's auxtrace_info_event.reserved__" ++ " contains a non-zero value: " ++ << auxtrace_info.reserved__ ++ << ". This" ++ " record's format has changed."; ++ } ++ for (u64 i = 0; i < priv_size; ++i) { ++ sample->add_unparsed_binary_blob_priv_data(auxtrace_info.priv[i]); ++ } ++ return true; ++} ++ ++bool PerfSerializer::DeserializeAuxtraceInfoEvent( ++ const PerfDataProto_AuxtraceInfoEvent& sample, event_t* event) const { ++ struct auxtrace_info_event& auxtrace_info = event->auxtrace_info; ++ auxtrace_info.type = sample.type(); ++ for (u64 i = 0; i < sample.unparsed_binary_blob_priv_data_size(); ++i) { ++ auxtrace_info.priv[i] = sample.unparsed_binary_blob_priv_data(i); ++ } ++ return true; ++} ++ + bool PerfSerializer::SerializeAuxtraceEvent( + const event_t& event, PerfDataProto_AuxtraceEvent* sample) const { + const struct auxtrace_event& auxtrace = event.auxtrace; +diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.h b/third_party/perf_data_converter/src/quipper/perf_serializer.h +index 606335d..dbdeb23 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_serializer.h ++++ b/third_party/perf_data_converter/src/quipper/perf_serializer.h +@@ -143,6 +143,10 @@ class PerfSerializer { + bool DeserializeBuildIDEvent(const PerfDataProto_PerfBuildID& from, + malloced_unique_ptr* to) const; + ++ bool SerializeAuxtraceInfoEvent( ++ const event_t& event, PerfDataProto_AuxtraceInfoEvent* sample) const; ++ bool DeserializeAuxtraceInfoEvent( ++ const PerfDataProto_AuxtraceInfoEvent& sample, event_t* event) const; + bool SerializeAuxtraceEvent(const event_t& event, + PerfDataProto_AuxtraceEvent* sample) const; + bool SerializeAuxtraceEventTraceData(const std::vector& from, diff --git a/autofdo.spec b/autofdo.spec index 1b8ef0c..648e813 100644 --- a/autofdo.spec +++ b/autofdo.spec @@ -1,6 +1,6 @@ Name: autofdo Version: 0.19 -Release: 1 +Release: 2 Summary: A tool to convert perf.data profile to AutoFDO profile License: Apache-2.0 URL: https://github.com/google/autofdo @@ -9,6 +9,7 @@ URL: https://github.com/google/autofdo # Source: https://github.com/google/%{name}/releases/download/%{version}/{version}.tar.gz Source: %{name}-%{version}.tar.xz Patch0: 0001-Multi-event-processing-support.patch +Patch1: 0002-Arm-spe-parser-support.patch BuildRequires: gcc gcc-c++ libtool autoconf automake git elfutils-libelf-devel openssl-devel pkg-config Requires: glibc openssl-libs elfutils libgcc libstdc++ zlib @@ -41,6 +42,12 @@ make -j 1 %exclude %{_bindir}/create_llvm_prof %changelog +* Sun Feb 13 2022 liyancheng <412998149@qq.com> - 0.19-2 +- Type:enhancement +- ID:NA +- SUG:NA +- DESC:Support processing ARM SPE event + * Sun Feb 13 2022 liyancheng <412998149@qq.com> - 0.19-1 - Type:enhancement - ID:NA -- Gitee