From e736f2c41619c4f50f072366e537822c3b9a76d7 Mon Sep 17 00:00:00 2001 From: echo <2220386943@qq.com> Date: Thu, 18 Apr 2024 20:46:49 +0800 Subject: [PATCH] optimize dwarf and elf analyze --- symbol/symbol_resolve.cpp | 294 ++++++++++++++++++++++++-------------- symbol/symbol_resolve.h | 113 ++++++++++----- 2 files changed, 263 insertions(+), 144 deletions(-) diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 3a1b22d..9cb0089 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -20,8 +20,6 @@ #include #include #include -#include -#include #include #include "name_resolve.h" #include "pcerr.h" @@ -47,6 +45,7 @@ constexpr int KERNEL_MODULE_LNE = 128; constexpr int CODE_LINE_RANGE_LEN = 10; constexpr int HEX_LEN = 16; constexpr int TO_TAIL_LEN = 2; +constexpr unsigned long USER_MAX_ADDR = 0xffffffff; const std::string HUGEPAGE = "/anon_hugepage"; const std::string DEV_ZERO = "/dev/zero"; @@ -114,35 +113,14 @@ namespace { } } - static inline void DwarfInfoRecord(DWARF_DATA_MAP& dwarfTable, const dwarf::line_table& lt, - SymbolVet& dwarfFileArray) - { - for (const dwarf::line_table::entry& line : lt) { - if (line.end_sequence) { - continue; - } - std::string fileName = line.file->path(); - if (fileName.empty()) { - continue; - } - DwarfMap data = {0}; - data.lineNum = line.line; - data.fileIndex = dwarfFileArray.InsertKeyForIndex(fileName); - dwarfTable.insert({line.address, data}); - } - } - - static inline void ElfInfoRecord(std::vector& elfVector, const elf::section& sec) + static inline void ElfInfoRecord(MyElf& myElf, const elf::section& sec) { for (const auto& sym : sec.as_symtab()) { auto& data = sym.get_data(); - if (data.type() != elf::stt::func) + if (data.type() != elf::stt::func){ continue; - ElfMap elfData; - elfData.start = data.value; - elfData.end = data.value + data.size; - elfData.symbolName = sym.get_name(); - elfVector.emplace_back(elfData); + } + myElf.Emplace(data.value, sym); } } @@ -376,6 +354,146 @@ bool SymbolUtils::IsNumber(const std::string& str) return true; } +bool MyElf::IsExecFile() +{ + return elf.get_hdr().type == elf::et::exec; +} + +void MyElf::Emplace(unsigned long addr, const ELF_SYM& elfSym) +{ + this->symTab.insert({addr, elfSym}); +} + +ELF_SYM* MyElf::FindSymbol(unsigned long addr) +{ + if (symTab.empty()) { + return nullptr; + } + auto it = symTab.upper_bound(addr); + if(it == symTab.cbegin()) { + return nullptr; + } + --it; + if(addr > it->first + it->second.get_data().size) { + return nullptr; + } + return &it->second; +} + +void RangeL::FindLine(unsigned long addr, struct DwarfEntry& entry) +{ + if (!loadLineTable) { + lineTab = cu.get_line_table(); + loadLineTable = true; + } + auto rs = FindAddress(addr); + if (rs != lineTab.end()) { + entry.find = true; + entry.fileName = rs->file->path(); + entry.lineNum = rs->line; + } +} + +dwarf::line_table::iterator RangeL::FindAddress(unsigned long addr) +{ + auto prev = lineTab.begin(); + auto end = lineTab.end(); + if (prev == end) { + return prev; + } + auto it = prev; + for (++it; it != end; prev = it++) { + if (prev->address <= addr && it->address > addr && !prev->end_sequence) { + return prev; + } + } + if (it->address > addr && addr >= prev->address) { + return prev; + } + return end; +} + +void RangeL::ReadRange(const DWARF_CU& cu) +{ + this->cu = cu; + // it spends most time to get data, just load one time; + const dwarf::die &die = cu.root(); + if (die.has(dwarf::DW_AT::ranges) && die[dwarf::DW_AT::ranges].is_valid_rangelist()) { + for (auto &dwarfRange : dwarf::at_ranges(die)) { + if (dwarfRange.low >= dwarfRange.high) { + continue; + } + if (rangeMap.find(dwarfRange.low) != rangeMap.end()) { + continue; + } + rangeMap.insert({dwarfRange.low, dwarfRange.high}); + } + } + + if (rangeMap.size() > 0) { + return; + } + + // if the die does not contain ranges, get_line_table obtains + lineTab = cu.get_line_table(); + loadLineTable = true; + auto it = lineTab.begin(); + auto end = lineTab.end(); + unsigned long minAddr = it->address; + unsigned long maxAddr = it->address; + while (it != end) { + if (it->end_sequence) { + maxAddr = maxAddr < it->address ? it->address : maxAddr; + rangeMap.insert({minAddr, maxAddr}); + ++it; + minAddr = it->address; + maxAddr = it->address; + continue; + } + ++it; + } + maxAddr = maxAddr < it->address ? it->address : maxAddr; + if (minAddr < maxAddr) { + rangeMap.insert({minAddr, maxAddr}); + } +} + +void MyDwarf::FindLine(unsigned long addr, struct DwarfEntry &entry) +{ + for (auto &range : rangeList) { + if (range.IsInLineTable(addr)) { + range.FindLine(addr, entry); + return; + } + } +} + +void MyDwarf::LoadDwarf(unsigned long addr, DwarfEntry& entry) +{ + if (hasLoad) { + return; + } + if (cuList.empty()) { + for (const auto &cu : dw.compilation_units()) { + cuList.push_back(cu); + } + } + // Set hasLoad = true if it's loaded + while (loadNum < cuList.size()) + { + const auto &cu = cuList.at(loadNum); + RangeL range; + range.ReadRange(cu); + rangeList.push_back(range); + loadNum++; + if (range.IsInLineTable(addr)) { + range.FindLine(addr, entry); + return; + } + } + hasLoad = true; +} + int SymbolResolve::RecordModule(int pid, RecordModuleType recordModuleType) { if (pid < 0) { @@ -479,13 +597,6 @@ void SymbolResolve::FreeModule(int pid) return; } -struct SortElf { - inline bool operator()(const ElfMap& first, const ElfMap& second) - { - return (first.start < second.start); - } -}; - int SymbolResolve::RecordElf(const char* fileName) { SetFalse(this->isCleared); @@ -515,23 +626,22 @@ int SymbolResolve::RecordElf(const char* fileName) std::shared_ptr efLoader = elf::create_mmap_loader(fd); elf::elf ef(efLoader); - std::vector elfVector; + MyElf myElf(ef); try { for (const auto& sec : ef.sections()) { if (sec.get_hdr().type != elf::sht::symtab && sec.get_hdr().type != elf::sht::dynsym) { continue; } - ElfInfoRecord(elfVector, sec); + ElfInfoRecord(myElf, sec); } } catch (elf::format_error& error) { + close(fd); pcerr::New(LIBSYM_ERR_ELFIN_FOMAT_FAILED, "libsym record elf format error: " + std::string{error.what()}); elfSafeHandler.releaseLock(file); return LIBSYM_ERR_ELFIN_FOMAT_FAILED; } - - std::sort(elfVector.begin(), elfVector.end(), SortElf()); - this->elfMap.insert({file, elfVector}); + this->elfMap.emplace(file, myElf); close(fd); pcerr::New(0, "success"); elfSafeHandler.releaseLock(file); @@ -570,18 +680,10 @@ int SymbolResolve::RecordDwarf(const char* fileName) try { dwarf::dwarf dw(dwarf::elf::create_loader(ef)); - DWARF_DATA_MAP dwarfTable; - for (const auto& cu : dw.compilation_units()) { - const dwarf::line_table lt = cu.get_line_table(); - DwarfInfoRecord(dwarfTable, lt, dwarfFileArray); - } - std::vector addrVet; - for (auto it = dwarfTable.begin(); it != dwarfTable.end(); ++it) { - addrVet.push_back(it->first); - } - this->dwarfMap.insert({file, dwarfTable}); - this->dwarfVetMap.insert({file, addrVet}); + MyDwarf myDwarf(dw, file); + dwarfMap.emplace(file, myDwarf); } catch (dwarf::format_error& error) { + close(fd); dwarfSafeHandler.releaseLock((file)); pcerr::New(LIBSYM_ERR_DWARF_FORMAT_FAILED, "libsym record dwarf file named " + file + " format error: " + std::string{error.what()}); @@ -619,63 +721,40 @@ void SymbolResolve::Clear() this->instance = nullptr; } -void SymbolResolve::SearchElfInfo( - std::vector& elfVec, unsigned long addr, struct Symbol* symbol, unsigned long* offset) +void SymbolResolve::SearchElfInfo(MyElf& myElf, unsigned long addr, struct Symbol* symbol, unsigned long* offset) { - ssize_t start = 0; - ssize_t end = elfVec.size() - 1; - ssize_t mid; - unsigned long symAddr; - - while (start < end) { - mid = start + (end - start + 1) / BINARY_HALF; - symAddr = elfVec[mid].start; - if (symAddr <= addr) { - start = mid; - } else { - end = mid - 1; - } + ELF_SYM *elfSym = myElf.FindSymbol(addr); + if (elfSym == nullptr) { + strcpy(symbol->symbolName, "UNKNOWN"); + return; } - - if (start == end && elfVec[start].start <= addr && elfVec[start].end >= addr) { - *offset = addr - elfVec[start].start; - symbol->codeMapEndAddr = elfVec[start].end; - char* name = CppNamedDemangle(elfVec[start].symbolName.c_str()); - if (name) { - strcpy(symbol->symbolName, name); - free(name); - name = nullptr; - return; - } - strcpy(symbol->symbolName, elfVec[start].symbolName.c_str()); + symbol->codeMapEndAddr = elfSym->get_data().value + elfSym->get_data().size; + *offset = addr - elfSym->get_data().value; + std::string symName = elfSym->get_name(); + char *name = CppNamedDemangle(symName.c_str()); + if (name) { + strcpy(symbol->symbolName, name); + free(name); + name = nullptr; return; } - strcpy(symbol->symbolName, "UNKNOWN"); + strcpy(symbol->symbolName, symName.c_str()); return; } -void SymbolResolve::SearchDwarfInfo( - std::vector& addrVet, DWARF_DATA_MAP& dwarfDataMap, unsigned long addr, struct Symbol* symbol) +void SymbolResolve::SearchDwarfInfo(MyDwarf& myDwarf, unsigned long addr, struct Symbol* symbol) { - DwarfMap dwarfMap = {0}; - bool findLine = false; - if (dwarfDataMap.find(addr) != dwarfDataMap.end()) { - dwarfMap = dwarfDataMap.at(addr); - findLine = true; - } else { - auto it = std::upper_bound(addrVet.begin(), addrVet.end(), addr); - if (it > addrVet.begin() && it < addrVet.end()) { - --it; - } - if (it != addrVet.end()) { - dwarfMap = dwarfDataMap.at(*it); - findLine = true; - } - } - if (findLine) { - std::string fileName = dwarfFileArray.GetKeyByIndex(dwarfMap.fileIndex); - strcpy(symbol->fileName, fileName.c_str()); - symbol->lineNum = dwarfMap.lineNum; + DwarfEntry dwarfEntry; + const std::string moduleName = myDwarf.GetModule(); + dwarfLoadHandler.tryLock(moduleName); + myDwarf.FindLine(addr, dwarfEntry); + if(!dwarfEntry.find) { + myDwarf.LoadDwarf(addr, dwarfEntry); + } + dwarfLoadHandler.releaseLock(moduleName); + if (dwarfEntry.find) { + strcpy(symbol->fileName, dwarfEntry.fileName.c_str()); + symbol->lineNum = dwarfEntry.lineNum; } else { strcpy(symbol->fileName, "Uknown"); symbol->lineNum = 0; @@ -795,18 +874,16 @@ struct Symbol* SymbolResolve::MapUserAddr(int pid, unsigned long addr) if (this->elfMap.find(module->moduleName) != this->elfMap.end()) { // If the largest symbol in the elf symbol table is detected to be smaller than the searched symbol, subtraction // is performed. - std::vector elfVet = this->elfMap.at(module->moduleName); - if (!elfVet.empty()) { - if (elfVet.back().end < addrToSearch && addrToSearch > module->start) { - addrToSearch = addrToSearch - module->start; - } - this->SearchElfInfo(elfVet, addrToSearch, symbol, &symbol->offset); + MyElf& myElf = this->elfMap.at(module->moduleName); + // if the file is not exectable, subtraction is required + if(!myElf.IsExecFile()){ + addrToSearch = addrToSearch - module->start; } + this->SearchElfInfo(myElf, addrToSearch, symbol, &symbol->offset); } if (this->dwarfMap.find(module->moduleName) != this->dwarfMap.end()) { - this->SearchDwarfInfo( - this->dwarfVetMap.at(module->moduleName), this->dwarfMap.at(module->moduleName), addrToSearch, symbol); + this->SearchDwarfInfo(this->dwarfMap.at(module->moduleName), addrToSearch, symbol); } symbol->codeMapAddr = addrToSearch; this->symbolMap.at(pid).insert({addr, symbol}); @@ -952,8 +1029,7 @@ struct Symbol* SymbolResolve::MapCodeAddr(const char* moduleName, unsigned long } int ret = RecordDwarf(moduleName); if (ret == 0) { - this->SearchDwarfInfo( - this->dwarfVetMap.at(moduleName), this->dwarfMap.at(moduleName), symbol->codeMapAddr, symbol); + this->SearchDwarfInfo(this->dwarfMap.at(moduleName), symbol->codeMapAddr, symbol); } else { symbol->fileName = nullptr; } diff --git a/symbol/symbol_resolve.h b/symbol/symbol_resolve.h index e87d6ba..edb79e7 100644 --- a/symbol/symbol_resolve.h +++ b/symbol/symbol_resolve.h @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include "safe_handler.h" #include "linked_list.h" @@ -41,50 +43,94 @@ namespace KUNPENG_SYM { std::string symbolName; } __attribute__((aligned(8))); - struct DwarfMap { - unsigned int lineNum; - unsigned int fileIndex; - } __attribute__((aligned(8))); + struct DwarfEntry { + unsigned int lineNum = 0; + std::string fileName = {}; + bool find = false; + }; enum class RecordModuleType { RECORD_ALL = 0, RECORD_NO_DWARF = 1 }; - using SYMBOL_MAP = std::unordered_map>; - using STACK_MAP = std::unordered_map>; - using MODULE_MAP = std::unordered_map>>; - using DWARF_DATA_MAP = std::map; - using DWARF_MAP = std::unordered_map; - using DWARF_VET_MAP = std::unordered_map>; - using ELF_MAP = std::unordered_map>; + using ELF_SYM = elf::sym; + using ELF = elf::elf; + using DWARF = dwarf::dwarf; + using DWARF_TABEL = dwarf::line_table; + using DWARF_ENTRY = dwarf::line_table::entry; + using DWARF_CU = dwarf::compilation_unit; - template - class SymbolVet : public std::vector { + class RangeL { public: - unsigned int InsertKeyForIndex(const Key& key) + RangeL() = default; + + void FindLine(unsigned long addr, struct DwarfEntry &entry); + + void ReadRange(const DWARF_CU &cu); + + dwarf::line_table::iterator FindAddress(unsigned long addr); + + bool IsInLineTable(unsigned long addr) const { - std::lock_guard guard(keyMutex); - if (keyMap.find(key) != keyMap.end()) { - return keyMap.at(key); + auto it = rangeMap.upper_bound(addr); + if (it == rangeMap.cbegin()) { + return false; } - this->push_back(key); - keyMap[key] = this->size() - 1; - return this->size() - 1; + it--; + const unsigned long *highAddr = &it->second; + return addr <= *highAddr; + } + + private: + std::map rangeMap; + bool loadLineTable = false; + DWARF_TABEL lineTab; + DWARF_CU cu; + }; + + class MyElf + { + public: + explicit MyElf(const ELF &elf) : elf(elf){}; + ELF_SYM *FindSymbol(unsigned long addr); + void Emplace(unsigned long addr, const ELF_SYM &elfSym); + bool IsExecFile(); + + private: + ELF elf; + std::map symTab; + }; + + class MyDwarf + { + public: + MyDwarf(const DWARF &dw, const std::string &moduleName) : dw(dw), moduleName(moduleName){}; + void FindLine(unsigned long addr, struct DwarfEntry &dwarfEntry); + void LoadDwarf(unsigned long addr, struct DwarfEntry &dwarfEntry); + + bool IsLoad() const + { + return hasLoad; } - Key& GetKeyByIndex(const unsigned int index) + std::string GetModule() const { - std::lock_guard guard(keyMutex); - if (index < this->size()) { - return this->at(index); - } - Key key = {}; - return key; + return this->moduleName; } private: - std::unordered_map keyMap; - std::mutex keyMutex; + DWARF dw; + std::string moduleName; + volatile bool hasLoad = false; + volatile int loadNum = 0; + std::vector rangeList; + std::vector cuList; }; + using SYMBOL_MAP = std::unordered_map>; + using STACK_MAP = std::unordered_map>; + using MODULE_MAP = std::unordered_map>>; + using DWARF_MAP = std::unordered_map; + using ELF_MAP = std::unordered_map; + class SymbolUtils final { public: SymbolUtils() = default; @@ -126,10 +172,8 @@ namespace KUNPENG_SYM { struct Symbol* MapCodeAddr(const char* moduleName, unsigned long startAddr); private: - void SearchElfInfo( - std::vector& elfVec, unsigned long addr, struct Symbol* symbol, unsigned long* offset); - void SearchDwarfInfo( - std::vector& addrVet, DWARF_DATA_MAP& dwalfVec, unsigned long addr, struct Symbol* symbol); + void SearchElfInfo(MyElf &myElf, unsigned long addr, struct Symbol *symbol, unsigned long *offset); + void SearchDwarfInfo(MyDwarf &myDwarf, unsigned long addr, struct Symbol *symbol); struct Symbol* MapKernelAddr(unsigned long addr); struct Symbol* MapUserAddr(int pid, unsigned long addr); struct Symbol* MapUserCodeAddr(const std::string& moduleName, unsigned long addr); @@ -142,11 +186,9 @@ namespace KUNPENG_SYM { STACK_MAP stackMap{}; MODULE_MAP moduleMap{}; DWARF_MAP dwarfMap{}; - DWARF_VET_MAP dwarfVetMap{}; ELF_MAP elfMap{}; bool isCleared = false; std::vector> ksymArray; - SymbolVet dwarfFileArray; SymbolResolve() {} @@ -159,6 +201,7 @@ namespace KUNPENG_SYM { SafeHandler dwarfSafeHandler; SafeHandler elfSafeHandler; SafeHandler symSafeHandler; + SafeHandler dwarfLoadHandler; static std::mutex kernelMutex; static SymbolResolve* instance; static std::mutex mutex; -- Gitee