diff --git a/llvm-build/func_structure_demo.py b/llvm-build/func_structure_demo.py new file mode 100644 index 0000000000000000000000000000000000000000..a0686c3c6951b2e7112106a9297523f6190b82a3 --- /dev/null +++ b/llvm-build/func_structure_demo.py @@ -0,0 +1,250 @@ +import re +from collections import defaultdict +import subprocess +from collections import deque +import graphhviz + +# read the file +with open("hwasan-sp", "r", encoding='utf-8') as f: + lines = f.readlines() + +current_line = 0 +hwasan_content = { + 'module_name' : '', + 'error_info' : '', + 'detail' : '', + 'call_stack' : [], + 'allocated_frames' : [] +} + +while current_line < len(lines): + line = lines[current_line] + if line.startswith('Modeul name:'): + hwasan_content['module_name'] = line.split(':')[1].strip() + current_line += 1 + break + current_line += 1 + +# error +error_pattern = r'ERROR:(.*)' + +while current_line < len(lines): + line = lines[current_line] + match = re.search(error_pattern, line) + if match: + hwasan_content['error_info'] = match.group(1).strip() + current_line += 1 + break + current_line += 1 + +#detail +hwasan_content['detail'] = lines[current_line].strip() +current_line += 1 + +#stack +while (lines[current_line].strip() != '') and lines[current_line].strip()[0] == '#': + hwasan_content['call_stack'].append(tuple(lines[current_line].split())) + current_line += 1 + +while current_line < len(lines): + line = lines[current_line] + if line.startswith('Previously allocated frames:'): + current_line += 1 + + while current_line < len(lines) and (lines[current_line].strip().startswith("record_addr")): + hwasan_content['allocated_frames'].append(tuple(lines[current_line].strip().split())) + current_line += 1 + break + current_line += 1 + +#extract log +def demangle(name): + try: + result = subprocess.run( + ['c++filt', '-n', name], + capture_output=True, + text = True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError: + return name + +def preprocess_log(input_filename, output_filename): + func_header = re.compile(r'^[0-9a-f]+ <([_a-zA-Z0-9]+)>:') + opcode = re.compile(r'\b(bl|sub|add|adr)\b') + + with open(input_filename, 'r') as f_in: + with open(output_filename, 'w') as f_out: + current_func = None + for line in f_in: + line = line.strip() + + #func header + m_func = func_header.match(line) + if m_func: + current_func = m_func.group(1) + demangled_func = demangle(current_func) + + if demangled_func.startwith('__'): + current_func = None #skip system funcs + continue + + f_out.write(line + '\n') + continue + if current_func: + if opcode.search(line): + f_out.write(line + '\n') + +def parse_log(filename): + func_header = re.compile(r'^[0-9a-f]+ <([_a-zA-Z0-9]+)>:') + bl_call = re.compile(r'\bbl\s+[0-9a-fx]+<([_a-zA-Z0-9]+)>') + # key: func name, value: callee list + call_graph = defaultdict(list) + current_func = None + + with open(filename, 'r') as f: + for line in f: + line = line.strip() + + m_func = func_header.match(line) + if m_func: + current_func = m_func.group(1) + continue + + if current_func: + m_call = bl_call.search(line) + if m_call: + callee = m_call.group(1) + + call_graph[demangle(current_func)].append(demangle(callee)) + return call_graph + +def filter_graph_by_main(graph): + if 'main' not in graph: + return {} + + visited = set() + queue = deque(['main']) + filtered_graph = {} + + while queue: + current_func = queue.popleft() + + if current_func in visited: + continue + + visited.add(current_func) + + if current_func in graph and len(graph[current_func]) > 0: + filtered_graph[current_func] = [] + for callee in graph[current_func]: + if callee not in visited: + filtered_graph[current_func].append(callee) + queue.append(callee) + + return filtered_graph + +# file path +preprocess_log('log/sp.log', 'log/sp_filtered.log') +graph = parse_log('log\\sp_filtered.log') +g = filter_graph_by_main(graph) + +#generate func structure +class FunctionInfo: + def __init__(self, offset = None, name = None, record = None, fp = None, sp = None, pre_sp = None, child = None): + self.offset = offset + self.name = name + self.record = record + self.fp = fp + self.sp = sp + self.pre_sp = pre_sp + self.child = child if child is not None else [] + +# search for sp according to the offset +def calculate_log(file_path, offset): + func_header = re.compile(r'^[0-9a-f]+ <([_a-zA-Z0-9]+)>:') + pattern_sub = r'.*sub\s+sp, \s+sp, \s+#(\d+)$' + pattern_add = r'.*add\s+x29, \s+sp, \s+#(\d+)$' + current_func = None + search_idx = 0 + sub = 0 + add = 0 + with open(file_path, 'r') as f: + lines = f.readlines() + for idx, line in enumerate(lines): + line = line.strip() + + tmp_func = func_header.match(line) + if tmp_func: + current_func = tmp_func.group(1) + search_idx = idx + 1 + continue + if line.strip().startswith(offset): + while lines[search_idx].strip() != '': + sub_match = re.search(pattern_sub, lines[search_idx]) + add_match = re.search(pattern_add, lines[search_idx]) + if sub_match: + sub = sub_match.group(1) + if add_match: + add = add_match.group(1) + return ((demangle(current_func), sub, add)) + search_idx += 1 + +# init func +functions = [] +for frame in hwasan_content['allocated_frames'][::-1]: + record = frame[1] + path = frame[2] + #get the offset + plus_pos = path.rfind('+') + offset = path[plus_pos + 3: plus_pos + 8] + name, sub, add = calculate_log('log\\sp_filtered.log', offset) + fp = record[7:13] + '0' + sp = hex(int(fp, 16) - int(add)) + pre_sp = hex(int(sp, 16) + int(sub)) + func = FunctionInfo(offset = offset, name = name, record = record, fp = fp, sp = sp, pre_sp = pre_sp) + functions.append(func) + +# generate function structure +stack = [] +for func in functions: + current_sp = int(func.pre_sp, 16) + while stack and current_sp >= int(stack[-1].pre_sp, 16): + stack.pop() + if stack: + stack[-1].child.append(func) + stack.append(func) + +def build_call_tree(call_data): + dot = graphhviz.Digraph() + all_nodes = set() + + for parent, children in call_data.items(): + all_nodes.add(parent) + for child in children: + all_nodes.add(child) + dot.edge(parent, child) + + for node in all_nodes: + dot.node(node) + return dot, all_nodes + +#highlight the subset +def highlight_subset(dot, subset_data, all_nodes): + for node in subset_data: + if node in all_nodes: + dot.node(node, color='red', style='bold', fillcolor = 'lightred') + +def visualize_call_tree(call_data, subset_data = None): + dot, all_nodes = build_call_tree(call_data) + if subset_data: + highlight_subset(dot, subset_data, all_nodes) + return dot + +call_data = g +subset_data = [] +for func in functions: + subset_data.append(func.name) + +dot_graph = visualize_call_tree(call_data, subset_data) +dot_graph.render('func_struc_demo', format='png', cleanup=True) \ No newline at end of file diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 149c5ef4e49392c5d1769b1305e8ad92f71834eb..dc4e60011f64fd6d16f24bd74d657aa9d0337ed8 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -22,6 +22,7 @@ #include #include #include +#include "llvm/ADT/Optional.h" namespace llvm { @@ -81,6 +82,9 @@ public: return Die->hasChildren(); } + /// Returns the offset of a constant data member if it exists. + llvm::Optional getConstDataMemberOffset() const; + /// Returns true for a valid DIE that terminates a sibling chain. bool isNULL() const { return getAbbreviationDeclarationPtr() == nullptr; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 15a2d23c4fd2a34e3832acd1478f1a18bde84aca..bce8cff5c724fa535f5f858954062825d688e795 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -774,3 +774,14 @@ void dumpTypeUnqualifiedName(const DWARFDie &DIE, raw_ostream &OS, } } // namespace llvm + +//return constant data-member offset if one exists +llvm::Optional DWARFDie::getConstDataMemberOffset() const { + if (getTag() != dwarf::DW_TAG_member) + return llvm::None; + if (auto Val = find(dwarf::DW_AT_data_member_location)){ + if (llvm::Optional Cst = Val->getAsUnsignedConstant()) + return Cst; + } + return llvm::None; +} \ No newline at end of file diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp index c8ea6b543245a13a44d19ac86653f762c824ffe5..4b9dc8f0ebfa6df3577f70529179904b10dc7e6a 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.cpp +++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp @@ -20,6 +20,11 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/DataExtractor.h" #define DEBUG_TYPE "objdump" @@ -30,9 +35,48 @@ unsigned getInstStartColumn(const MCSubtargetInfo &STI) { return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24; } -bool LiveVariable::liveAtAddress(object::SectionedAddress Addr) { - if (LocExpr.Range == None) +static DWARFDie getTopFunctionDie(DWARFDie Die) { + while (Die && Die.getTag() != dwarf::DW_TAG_subprogram) + Die = Die.getParent(); + return Die; +} +static bool tryFoldOffset(SmallVectorImpl &Bytes,uint64_t MemberOff){ + if (Bytes.empty()) + return false; + + uint8_t Op = Bytes[0]; + bool IsFbreg = (Op == dwarf::DW_OP_fbreg); + bool IsBreg = (Op >= dwarf::DW_OP_breg0 && Op <= (dwarf::DW_OP_breg31)); + if(!IsFbreg && !IsBreg) + return false; + + //read original SLEB128 + const uint8_t *Ptr = Bytes.data() + 1; + unsigned N = 0; + int64_t BaseOff = llvm:decodeSLEB128(Ptr, &N, nullptr); + + if (1+N != Bytes.size()) return false; + + int64_t NewOff = BaseOff + static_cast(MemberOff); + + //recode opcode + new SLEB128 + SmallVector Newbytes; + Newbytes.push_back(Op); + + uint8_t Tmp[16]; + unsigned Sz = llvm::encodeSLEB128(NewOff, Tmp, 0); + Newbytes.append(Tmp, Tmp + Sz); + + Bytes.assign(Newbytes.begin(), Newbytes.end()); + return true; +} + +bool LiveVariable::liveAtAddress(object::SectionedAddress Addr) { + // if (LocExpr.Range == None) + // return false; + if(!LocExpr) + return true; return LocExpr.Range->SectionIndex == Addr.SectionIndex && LocExpr.Range->LowPC <= Addr.Address && LocExpr.Range->HighPC > Addr.Address; @@ -43,6 +87,63 @@ void LiveVariable::print(raw_ostream &OS, const MCRegisterInfo &MRI) const { Unit->getContext().isLittleEndian(), 0); DWARFExpression Expression(Data, Unit->getAddressByteSize()); Expression.printCompact(OS, MRI); + + if (MemberOffset) + OS << format(" +0x%X", MemberOffset); +} + +void LiveVariablePrinter::addAggregateMembers( + const DWARFDie &FuncDie, + const DWARFDie &VarDie, + StringRef BaseName, + const DWARFLocationExpressionsVector &BaseLocs){ + DWARFUnit *U = VarDie.getDwarfUnit(); + if(!U) + return; + DWARFDie Ty = VarDie.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); // get variable type + if (!Ty) + return; + unsigned Tag = Ty.getTag(); + if(Tag != dwarf::DW_TAG_structure_type && + Tag != dwarf::DW_TAG_class_type && + Tag != dwarf::DW_TAG_union_type && + Tag != dwarf::DW_TAG_array_type) + return; + + // list all the members + for(DWARFDie Child : Ty.children()){ + if (Child.getTag() != dwarf::DW_TAG_member) + continue; + auto Offset = Child.getConstDataMemberOffset(); + if (!Offset) + continue; + // get member's fullname + SmallString<32> FullName(BaseName); + FullName += "."; + FullName += Child.getName(DINameKind::ShortName); + + // copy from parent's location expression + for (const auto &ParentLoc :BaseLocs){ + DWARFLocationExpression MemberLoc = ParentLoc; + + SmallVector Bytes(ParentLoc.Expr.begin(), ParentLoc.Expr.end()); + + if (!tryFoldOffset(Bytes, *Offset)){ + Bytes.push_back(dwarf::DW_OP_plus_uconst); + uint8_t Tmp[16]; + unsigned Sz = llvm::encodeULEB128(*Offset, Tmp, 0); + Bytes.append(Tmp, Tmp + Sz); + } + + MemberLoc.Expr.assign(Bytes.begin(), Bytes.end()); + MemberLoc.Range = ParentLoc.Range; + + DWARFDie TopFunc = getTopFunctionDie(FuncDie); + LiveVariables.emplace_back(MemberLoc, strdup(FullName.c_str()), U, TopFunc, *Offset); + } + addAggregateMembers(FuncDie, Child, FullName, BaseLocs); + } + } } void LiveVariablePrinter::addVariable(DWARFDie FuncDie, DWARFDie VarDie) { @@ -74,6 +175,7 @@ void LiveVariablePrinter::addVariable(DWARFDie FuncDie, DWARFDie VarDie) { LiveVariables.emplace_back(WholeFuncExpr, VarName, U, FuncDie); } } + addAggregateMembers(FuncDie, VarDie, VarName, *Locs); } void LiveVariablePrinter::addFunction(DWARFDie D) { diff --git a/llvm/tools/llvm-objdump/SourcePrinter.h b/llvm/tools/llvm-objdump/SourcePrinter.h index 29ef19c98c803e27cee66f2dfd5b35f30062be46..ab1e03156aa7f10962d3f9be85e6b2e8b5eacddb 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.h +++ b/llvm/tools/llvm-objdump/SourcePrinter.h @@ -28,10 +28,11 @@ struct LiveVariable { const char *VarName; DWARFUnit *Unit; const DWARFDie FuncDie; + uint64_t MemberOffset = 0; // set the offset of top member variable to 0 LiveVariable(const DWARFLocationExpression &LocExpr, const char *VarName, - DWARFUnit *Unit, const DWARFDie FuncDie) - : LocExpr(LocExpr), VarName(VarName), Unit(Unit), FuncDie(FuncDie) {} + DWARFUnit *Unit, const DWARFDie FuncDie, uint64_t MemberOffset = 0) + : LocExpr(LocExpr), VarName(VarName), Unit(Unit), FuncDie(FuncDie), MemberOffset(MemberOffset) {} bool liveAtAddress(object::SectionedAddress Addr); @@ -85,6 +86,10 @@ public: void dump() const; + void addAggregateMembers(const DWARFDie &FuncDie, const DWARFDie &VarDie, + StringRef BaseName, + const DWARFLocationExpressionsVector &BaseLocs); + void addCompileUnit(DWARFDie D); /// Update to match the state of the instruction between ThisAddr and