diff --git a/systrace/.clang-format b/systrace/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..466d8df6a1d2760cdbcf4a051bb4edba51530154 --- /dev/null +++ b/systrace/.clang-format @@ -0,0 +1,5 @@ +BasedOnStyle: LLVM +IndentWidth: 4 +BreakBeforeBraces: Allman +UseTab: Never +TabWidth: 4 diff --git a/systrace/CMakeLists.txt b/systrace/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4856e4521330306949be0053c0c38c80745478c1 --- /dev/null +++ b/systrace/CMakeLists.txt @@ -0,0 +1,70 @@ +cmake_minimum_required(VERSION 3.10) +project(sysTrace) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_SKIP_RPATH TRUE) +set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) +set(CMAKE_INSTALL_RPATH "") +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64") + set(UNWIND_LIB "unwind-aarch64") + set(MSPTI_INCLUDE "${PROJECT_SOURCE_DIR}/thirdparty/aarch64/mspti/include") + set(MSPTI_LIB "${PROJECT_SOURCE_DIR}/thirdparty/aarch64/mspti/lib64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64") + set(UNWIND_LIB "unwind") + set(MSPTI_INCLUDE "${PROJECT_SOURCE_DIR}/thirdparty/x86_64/mspti/include") + set(MSPTI_LIB "${PROJECT_SOURCE_DIR}/thirdparty/x86_64/mspti/lib64") +else() + message(WARNING "Unknown architecture: ${CMAKE_SYSTEM_PROCESSOR}") + set(UNWIND_LIB "unwind") +endif() +include_directories( + ${MSPTI_INCLUDE} +) + +find_package(Python3 REQUIRED COMPONENTS Development) +find_package(Protobuf REQUIRED) +find_package(Threads REQUIRED) + +find_library(ASCEND_MSPTI + NAMES mspti + HINTS ${MSPTI_LIB} +) +if(NOT ASCEND_MSPTI) + message(FATAL_ERROR "Ascend mspti library not found!") +endif() + +add_library(common STATIC + ${PROJECT_SOURCE_DIR}/include/common/logging.cc + ${PROJECT_SOURCE_DIR}/include/common/util.cc +) +target_include_directories(common PUBLIC ${PROJECT_SOURCE_DIR}/include ${Python3_INCLUDE_DIRS}) + + +add_subdirectory(protos) + +add_library(sysTrace_hook SHARED + ${PROJECT_SOURCE_DIR}/src/trace/systrace_manager.cc + ${PROJECT_SOURCE_DIR}/src/trace/library_loader.cc + ${PROJECT_SOURCE_DIR}/src/trace/python/pytorch_tracing_loader.cc + ${PROJECT_SOURCE_DIR}/src/trace/python/pytorch_tracing_manager.cc + ${PROJECT_SOURCE_DIR}/src/trace/python/pytorch_tracing.c + ${PROJECT_SOURCE_DIR}/src/ascend/hook.cc + ${PROJECT_SOURCE_DIR}/src/mspti/mspti_tracker.cpp + ${PROJECT_SOURCE_DIR}/src/cann/cann_hook.c +) + +set_target_properties(sysTrace_hook PROPERTIES OUTPUT_NAME "sysTrace") + +target_link_libraries(sysTrace_hook + common + general_pb2 + ${Python3_LIBRARIES} + protobuf::libprotobuf + ${CMAKE_THREAD_LIBS} + pthread + jsoncpp + -ldl +) diff --git a/systrace/build.sh b/systrace/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..3040c97c7af451e8fb35e834b1f0b20a0cb8be67 --- /dev/null +++ b/systrace/build.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +sudo dnf remove -y libunwind libunwind-devel 2>/dev/null || true +mkdir -p build + +cd protos +protoc --c_out=. systrace.proto +protoc --cpp_out=. systrace.proto +protoc --python_out=. systrace.proto +cd .. +cd build +cmake .. +make -j $(nproc) diff --git a/systrace/convert/convert_json2csv.py b/systrace/convert/convert_json2csv.py new file mode 100644 index 0000000000000000000000000000000000000000..55b8ea24f25b8592ae894929788089ae7c671ec1 --- /dev/null +++ b/systrace/convert/convert_json2csv.py @@ -0,0 +1,55 @@ +# coding=utf-8 +""" +Copyright (c) Huawei Technologies Co., Ltd. 2020-2028. All rights reserved. +Description: +FileName:convert_json2_csv.py +Author: h00568282/huangbin +Create Date: 2025/3/28 16:17 +Notes: + +""" +import os +import json +import pandas as pd +from util.logging_utils import get_default_logger + +logger = get_default_logger(__name__) + + +def convert_json2csv(json_path): + csv_path = f"{json_path[:-5]}.csv" + if os.path.exists(csv_path): + return + + try: + with open(json_path, 'r', encoding='utf-8') as file: + content = file.read() + content = content.replace(']\n[', ',').strip() + json_data = json.loads(content) + except: + logger.error("json data read error") + json_data = None + + if not json_data: + return + df = pd.json_normalize(json_data, sep='_') + + logger.info(f"save path: {csv_path}") + df.to_csv(csv_path, index=False) + + +def convert_jsons2csv(root_path): + json_files = [file for file in os.listdir(root_path) if file.endswith("json")] + + for json_file in json_files: + logger.info(f"{json_file}") + json_path = os.path.join(root_path, json_file) + convert_json2csv(json_path) + + +if __name__ == "__main__": + # json_path = "./data/json_data/hccl_activity.3.json" + # convert_json2csv(json_path) + + root_path = "./data/json_tp4dp1" + convert_jsons2csv(root_path) \ No newline at end of file diff --git a/systrace/convert/convert_mem_to_flamegraph.py b/systrace/convert/convert_mem_to_flamegraph.py new file mode 100644 index 0000000000000000000000000000000000000000..fd80b78b075b73955c728b802398f1687b6888e5 --- /dev/null +++ b/systrace/convert/convert_mem_to_flamegraph.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +import sys +import json +import os +import subprocess +from collections import defaultdict, deque +from concurrent.futures import ThreadPoolExecutor +from systrace_pb2 import ProcMem, StageType + +class FixedFlameGraphConverter: + def __init__(self): + self.stage_names = { + StageType.STAGE_UNKNOWN: "UNKNOWN", + StageType.STAGE_DATALOADER: "DATALOADER", + StageType.STAGE_FORWARD: "FORWARD", + StageType.STAGE_BACKWARD: "BACKWARD", + StageType.STAGE_SYNCHRONIZATION: "SYNCHRONIZATION", + getattr(StageType, "STAGE_GC", 5): "GC" + } + self.symbol_cache = {} + self.so_path_cache = {} + self.executor = ThreadPoolExecutor(max_workers=os.cpu_count() or 4) + + def convert(self, input_pb, output_json): + proc_mem = self._load_proc_mem(input_pb) + alloc_groups = self._analyze_allocations(proc_mem) + self._precache_symbols(alloc_groups) + + trace_events = [] + global_timestamp = 0 + + # 按stage_name分组处理 + stage_data = defaultdict(list) + for (stage_type, stage_id), allocs in alloc_groups.items(): + stage_name = f"{stage_id}_{self.stage_names.get(stage_type, 'UNKNOWN')}" + stage_data[stage_name].extend(allocs) + + for stage_name, allocs in stage_data.items(): + # if any(s in stage_name for s in ["0_", "1_", "2_"]): + # continue + + # 生成该stage的所有事件 + stage_events = [] + min_ts = global_timestamp + max_ts = global_timestamp + sum(a.mem_size for a in allocs) + + # 先添加容器事件(强制置顶) + container_event = { + "name": stage_name, + "ph": "X", + "ts": min_ts, + "dur": max_ts - min_ts, + "pid": proc_mem.pid, + "tid": proc_mem.pid, + "args": { + "stage_type": self.stage_names.get(next(iter(alloc_groups.keys()))[0], "UNKNOWN"), + "stage_id": next(iter(alloc_groups.keys()))[1], + "is_container": True + } + } + stage_events.append(container_event) + + # 处理每个分配 + current_ts = global_timestamp + for alloc in allocs: + alloc_events, _ = self._process_allocation(alloc, proc_mem.pid, current_ts) + stage_events.extend(alloc_events) + current_ts += alloc.mem_size + + # 合并同名调用 + merged_events = self._merge_calls(stage_events, stage_name) + trace_events.extend(merged_events) + global_timestamp = max_ts + + self._save_json(output_json, trace_events) + self.executor.shutdown() + + def _merge_calls(self, events, stage_name): + """合并相同stage下的同名调用""" + # 分离容器事件和调用事件 + container = [e for e in events if e.get("args", {}).get("is_container")][0] + calls = [e for e in events if not e.get("args", {}).get("is_container")] + + # 按深度和名称分组 + call_groups = defaultdict(list) + for e in calls: + key = (e["args"]["depth"], e["name"]) + call_groups[key].append(e) + + # 合并每组调用 + merged_calls = [] + for (depth, name), group in call_groups.items(): + if len(group) == 1: + merged_calls.extend(group) + continue + + group.sort(key=lambda x: x["ts"]) + current = dict(group[0]) + + for e in group[1:]: + if e["ts"] == current["ts"] + current["dur"]: + current["dur"] += e["dur"] + current["args"]["bytes"] += e["args"]["bytes"] + if "merged_ptrs" not in current["args"]: + current["args"]["merged_ptrs"] = [current["args"]["alloc_ptr"]] + current["args"]["merged_ptrs"].append(e["args"]["alloc_ptr"]) + else: + if "merged_ptrs" in current["args"]: + current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"]) + del current["args"]["merged_ptrs"] + merged_calls.append(current) + current = dict(e) + + if "merged_ptrs" in current["args"]: + current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"]) + del current["args"]["merged_ptrs"] + merged_calls.append(current) + + # 确保容器事件在最前 + return [container] + sorted(merged_calls, key=lambda x: x["ts"]) + + def _process_allocation(self, alloc, pid, base_ts): + """处理单个分配事件""" + events = [] + alloc_duration = alloc.mem_size + + # 构建调用栈树 + call_tree = { + "name": "[root]", + "duration": alloc_duration, + "children": [] + } + current_parent = call_tree + + for frame in alloc.stack_frames: + so_name = os.path.basename(frame.so_name) + symbol = self._resolve_symbol(so_name, frame.address) + node = { + "name": symbol, + "duration": alloc_duration, + "children": [] + } + current_parent["children"].append(node) + current_parent = node + + # 调整duration + def adjust_durations(node): + if node["children"]: + node["duration"] = sum(adjust_durations(child) for child in node["children"]) + return node["duration"] + adjust_durations(call_tree) + + # 生成事件(BFS遍历) + stack = deque([(call_tree, base_ts, 0)]) + call_events = [] + while stack: + node, ts, depth = stack.popleft() + call_events.append({ + "name": node["name"], + "ph": "X", + "ts": ts, + "dur": node["duration"], + "pid": pid, + "tid": pid, + "args": { + "depth": depth, + "bytes": alloc.mem_size, + "alloc_ptr": f"0x{alloc.alloc_ptr:x}" + } + }) + for child in reversed(node["children"]): + stack.appendleft((child, ts, depth + 1)) + + return call_events, alloc_duration + + # 保留其他基础方法 + def _load_proc_mem(self, path): + with open(path, "rb") as f: + proc_mem = ProcMem() + proc_mem.ParseFromString(f.read()) + return proc_mem + + def _analyze_allocations(self, proc_mem): + freed_ptrs = {free.alloc_ptr for free in proc_mem.mem_free_stacks} + active_allocs = defaultdict(list) + for alloc in proc_mem.mem_alloc_stacks: + #if alloc.alloc_ptr not in freed_ptrs: + active_allocs[(alloc.stage_type, alloc.stage_id)].append(alloc) + return active_allocs + + def _precache_symbols(self, alloc_groups): + unique_frames = set() + for allocs in alloc_groups.values(): + for alloc in allocs: + for frame in alloc.stack_frames: + so_name = os.path.basename(frame.so_name) + unique_frames.add((so_name, frame.address)) + list(self.executor.map(lambda args: self._resolve_symbol(*args), unique_frames)) + + def _resolve_symbol(self, so_name, address): + cache_key = f"{so_name}:{address:x}" + if cache_key in self.symbol_cache: + return self.symbol_cache[cache_key] + + so_path = self._find_so_path(so_name) + if not so_path: + symbol = f"{so_name}@0x{address:x}" + self.symbol_cache[cache_key] = symbol + return symbol + + try: + result = subprocess.run( + ["addr2line", "-e", so_path, "-f", "-C", "-p", f"0x{address:x}"], + capture_output=True, text=True, timeout=0.05 + ) + func_name = result.stdout.split(" at ")[0].split("(")[0].strip() if result.returncode == 0 else "" + symbol = f"{so_name}@{func_name}" if func_name else f"{so_name}@0x{address:x}" + except: + symbol = f"{so_name}@0x{address:x}" + + self.symbol_cache[cache_key] = symbol + return symbol + + def _find_so_path(self, so_name): + if so_name in self.so_path_cache: + return self.so_path_cache[so_name] + + if os.path.isabs(so_name) and os.path.exists(so_name): + self.so_path_cache[so_name] = so_name + return so_name + + base_name = os.path.basename(so_name) + search_paths = [ + "/usr/lib", "/usr/local/lib", "/lib", + *os.getenv("LD_LIBRARY_PATH", "").split(":"), + *os.getenv("PATH", "").split(":") + ] + + for path in filter(os.path.isdir, search_paths): + test_path = os.path.join(path, base_name) + if os.path.exists(test_path): + self.so_path_cache[so_name] = test_path + return test_path + + if base_name.startswith("lib") and ".so" in base_name: + lib_prefix = base_name.split(".so")[0] + for ext in ["", ".1", ".2", ".3", ".4", ".5"]: + test_path = os.path.join(path, f"{lib_prefix}.so{ext}") + if os.path.exists(test_path): + self.so_path_cache[so_name] = test_path + return test_path + + self.so_path_cache[so_name] = None + return None + + def _save_json(self, path, trace_events): + if os.path.isdir(path): + input_name = os.path.splitext(os.path.basename(sys.argv[1]))[0] + path = os.path.join(path, f"{input_name}_fixed_flamegraph.json") + + with open(path, "w") as f: + json.dump({ + "traceEvents": sorted(trace_events, key=lambda x: x["ts"]), + "displayTimeUnit": "ns", + "metadata": { + "format": "FixedFlameGraph", + "stage_order": list(self.stage_names.values()) + } + }, f, indent=2) + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python proc_mem_converter.py input.pb output.json") + sys.exit(1) + FixedFlameGraphConverter().convert(sys.argv[1], sys.argv[2]) \ No newline at end of file diff --git a/systrace/convert/convert_mem_to_flamegraph_for_cur.py b/systrace/convert/convert_mem_to_flamegraph_for_cur.py new file mode 100644 index 0000000000000000000000000000000000000000..38800260ed6049416ef7ae5a8872e8e95e543fbd --- /dev/null +++ b/systrace/convert/convert_mem_to_flamegraph_for_cur.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +import sys +import json +import os +import subprocess +from collections import defaultdict, deque +from concurrent.futures import ThreadPoolExecutor +from systrace_pb2 import ProcMem, StageType + +class FixedFlameGraphConverter: + def __init__(self): + self.stage_names = { + StageType.STAGE_UNKNOWN: "UNKNOWN", + StageType.STAGE_DATALOADER: "DATALOADER", + StageType.STAGE_FORWARD: "FORWARD", + StageType.STAGE_BACKWARD: "BACKWARD", + StageType.STAGE_SYNCHRONIZATION: "SYNCHRONIZATION", + getattr(StageType, "STAGE_GC", 5): "GC" + } + self.symbol_cache = {} + self.so_path_cache = {} + self.executor = ThreadPoolExecutor(max_workers=os.cpu_count() or 4) + + def convert(self, input_pb, output_json): + proc_mem = self._load_proc_mem(input_pb) + alloc_groups = self._analyze_allocations(proc_mem) + self._precache_symbols(alloc_groups) + + trace_events = [] + current_ts = 0 + alloc_records = {alloc.alloc_ptr: alloc for alloc in proc_mem.mem_alloc_stacks} + stage_stats = defaultdict(lambda: {'allocated': 0, 'freed': 0}) + + # 统计分配和释放 + for alloc in proc_mem.mem_alloc_stacks: + stage_key = (alloc.stage_type, alloc.stage_id) + stage_stats[stage_key]['allocated'] += alloc.mem_size + for free in proc_mem.mem_free_stacks: + if free.alloc_ptr in alloc_records: + alloc = alloc_records[free.alloc_ptr] + stage_key = (free.stage_type, free.stage_id) + stage_stats[stage_key]['freed'] += alloc.mem_size + + # 按stage_name分组(仅一次) + stage_data = defaultdict(list) + for (stage_type, stage_id), allocs in alloc_groups.items(): + stage_name = f"{stage_id}_{self.stage_names.get(stage_type, 'UNKNOWN')}" + stage_data[stage_name].extend(allocs) + + # 计算累计分配和持有内存 + cumulative_alloc = 0 + stage_alloc_info = {} + for stage_name, allocs in stage_data.items(): + stage_key = next(k for k in alloc_groups.keys() + if f"{k[1]}_{self.stage_names.get(k[0], 'UNKNOWN')}" == stage_name) + current_alloc = sum(a.mem_size for a in allocs) + current_free = stage_stats[stage_key]['freed'] + cumulative_alloc += (current_alloc - current_free) + held_memory = max(cumulative_alloc, 0) + stage_alloc_info[stage_name] = { + 'allocated': current_alloc, + 'freed': current_free, + 'held': held_memory # 避免负数 + } + cumulative_alloc += current_alloc + + # 生成时间轴 + for stage_name, allocs in stage_data.items(): + if stage_name.startswith(("0_", "1_", "2_")): + continue + + stage_events = [] + min_ts = current_ts # 使用严格连续的时间戳 + allocated_size = sum(a.mem_size for a in allocs) + max_ts = min_ts + allocated_size # 时间范围 = 新分配的内存 + + # 容器事件(时间范围反映新分配的内存) + container_event = { + "name": stage_name, + "ph": "X", + "ts": min_ts, + "dur": stage_alloc_info[stage_name]['held'] / 10000000, # 等于allocated_size + "pid": proc_mem.pid, + "tid": 1, + "args": { + "stage_type": self.stage_names.get(next(iter(alloc_groups.keys()))[0], "UNKNOWN"), + "stage_id": next(iter(alloc_groups.keys()))[1], + "is_container": True, + "allocated": stage_alloc_info[stage_name]['allocated'], + "freed": stage_alloc_info[stage_name]['freed'], + "held": stage_alloc_info[stage_name]['held'] # 持有的内存量(元数据) + } + } + stage_events.append(container_event) + + alloc_start_ts = min_ts + for alloc in allocs: + alloc_events, _ = self._process_allocation(alloc, proc_mem.pid, alloc_start_ts) + stage_events.extend(alloc_events) + alloc_start_ts += alloc.mem_size + + trace_events.extend(self._merge_calls(stage_events, stage_name)) + current_ts = max_ts + + self._save_json(output_json, trace_events) + self.executor.shutdown() + + def _merge_calls(self, events, stage_name): + """合并相同stage下的同名调用""" + # 分离容器事件和调用事件 + container = [e for e in events if e.get("args", {}).get("is_container")][0] + calls = [e for e in events if not e.get("args", {}).get("is_container")] + + # 按深度和名称分组 + call_groups = defaultdict(list) + for e in calls: + key = (e["args"]["depth"], e["name"]) + call_groups[key].append(e) + + # 合并每组调用 + merged_calls = [] + for (depth, name), group in call_groups.items(): + if len(group) == 1: + merged_calls.extend(group) + continue + + group.sort(key=lambda x: x["ts"]) + current = dict(group[0]) + + for e in group[1:]: + if e["ts"] == current["ts"] + current["dur"]: + current["dur"] += e["dur"] + current["args"]["bytes"] += e["args"]["bytes"] + if "merged_ptrs" not in current["args"]: + current["args"]["merged_ptrs"] = [current["args"]["alloc_ptr"]] + current["args"]["merged_ptrs"].append(e["args"]["alloc_ptr"]) + else: + if "merged_ptrs" in current["args"]: + current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"]) + del current["args"]["merged_ptrs"] + merged_calls.append(current) + current = dict(e) + + if "merged_ptrs" in current["args"]: + current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"]) + del current["args"]["merged_ptrs"] + merged_calls.append(current) + + # 确保容器事件在最前 + return [container] + sorted(merged_calls, key=lambda x: x["ts"]) + + def _process_allocation(self, alloc, pid, base_ts): + """处理单个分配事件""" + events = [] + alloc_duration = alloc.mem_size + + # 构建调用栈树 + call_tree = { + "name": "[root]", + "duration": alloc_duration, + "children": [] + } + current_parent = call_tree + + for frame in alloc.stack_frames: + so_name = os.path.basename(frame.so_name) + symbol = self._resolve_symbol(so_name, frame.address) + node = { + "name": symbol, + "duration": alloc_duration, + "children": [] + } + current_parent["children"].append(node) + current_parent = node + + # 调整duration + def adjust_durations(node): + if node["children"]: + node["duration"] = sum(adjust_durations(child) for child in node["children"]) + return node["duration"] + adjust_durations(call_tree) + + # 生成事件(BFS遍历) + stack = deque([(call_tree, base_ts, 0)]) + call_events = [] + while stack: + node, ts, depth = stack.popleft() + call_events.append({ + "name": node["name"], + "ph": "X", + "ts": ts, + "dur": node["duration"], + "pid": pid, + "tid": 2, + "args": { + "depth": depth, + "bytes": alloc.mem_size, + "alloc_ptr": f"0x{alloc.alloc_ptr:x}" + } + }) + for child in reversed(node["children"]): + stack.appendleft((child, ts, depth + 1)) + + return call_events, alloc_duration + + # 保留其他基础方法 + def _load_proc_mem(self, path): + with open(path, "rb") as f: + proc_mem = ProcMem() + proc_mem.ParseFromString(f.read()) + return proc_mem + + def _analyze_allocations(self, proc_mem): + freed_ptrs = {free.alloc_ptr for free in proc_mem.mem_free_stacks} + active_allocs = defaultdict(list) + for alloc in proc_mem.mem_alloc_stacks: + #if alloc.alloc_ptr not in freed_ptrs: + active_allocs[(alloc.stage_type, alloc.stage_id)].append(alloc) + return active_allocs + + def _precache_symbols(self, alloc_groups): + unique_frames = set() + for allocs in alloc_groups.values(): + for alloc in allocs: + for frame in alloc.stack_frames: + so_name = os.path.basename(frame.so_name) + unique_frames.add((so_name, frame.address)) + list(self.executor.map(lambda args: self._resolve_symbol(*args), unique_frames)) + + def _resolve_symbol(self, so_name, address): + cache_key = f"{so_name}:{address:x}" + if cache_key in self.symbol_cache: + return self.symbol_cache[cache_key] + + so_path = self._find_so_path(so_name) + if not so_path: + symbol = f"{so_name}@0x{address:x}" + self.symbol_cache[cache_key] = symbol + return symbol + + try: + result = subprocess.run( + ["addr2line", "-e", so_path, "-f", "-C", "-p", f"0x{address:x}"], + capture_output=True, text=True, timeout=0.05 + ) + func_name = result.stdout.split(" at ")[0].split("(")[0].strip() if result.returncode == 0 else "" + symbol = f"{so_name}@{func_name}" if func_name else f"{so_name}@0x{address:x}" + except: + symbol = f"{so_name}@0x{address:x}" + + self.symbol_cache[cache_key] = symbol + return symbol + + def _find_so_path(self, so_name): + if so_name in self.so_path_cache: + return self.so_path_cache[so_name] + + if os.path.isabs(so_name) and os.path.exists(so_name): + self.so_path_cache[so_name] = so_name + return so_name + + base_name = os.path.basename(so_name) + search_paths = [ + "/usr/lib", "/usr/local/lib", "/lib", + *os.getenv("LD_LIBRARY_PATH", "").split(":"), + *os.getenv("PATH", "").split(":") + ] + + for path in filter(os.path.isdir, search_paths): + test_path = os.path.join(path, base_name) + if os.path.exists(test_path): + self.so_path_cache[so_name] = test_path + return test_path + + if base_name.startswith("lib") and ".so" in base_name: + lib_prefix = base_name.split(".so")[0] + for ext in ["", ".1", ".2", ".3", ".4", ".5"]: + test_path = os.path.join(path, f"{lib_prefix}.so{ext}") + if os.path.exists(test_path): + self.so_path_cache[so_name] = test_path + return test_path + + self.so_path_cache[so_name] = None + return None + + def _save_json(self, path, trace_events): + if os.path.isdir(path): + input_name = os.path.splitext(os.path.basename(sys.argv[1]))[0] + path = os.path.join(path, f"{input_name}_fixed_flamegraph.json") + + with open(path, "w") as f: + json.dump({ + "traceEvents": sorted(trace_events, key=lambda x: x["ts"]), + "displayTimeUnit": "ns", + "metadata": { + "format": "FixedFlameGraph", + "stage_order": list(self.stage_names.values()) + } + }, f, indent=2) + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python proc_mem_converter.py input.pb output.json") + sys.exit(1) + FixedFlameGraphConverter().convert(sys.argv[1], sys.argv[2]) \ No newline at end of file diff --git a/systrace/convert/convert_mspti_timeline.py b/systrace/convert/convert_mspti_timeline.py new file mode 100644 index 0000000000000000000000000000000000000000..d8bbcf8f20972ee44571a71047460d0f288811d5 --- /dev/null +++ b/systrace/convert/convert_mspti_timeline.py @@ -0,0 +1,129 @@ +# coding=utf-8 +""" +Copyright (c) Huawei Technologies Co., Ltd. 2020-2028. All rights reserved. +Description: +FileName:slow_node_detection.py +Author: h00568282/huangbin +Create Date: 2025/3/26 11:23 +Notes: + +""" +import os +import json +import pandas as pd +from convert_json2csv import convert_jsons2csv + +__all__ = ['convert_mspti_timeline'] + +MODE = { + 0: "Host", + 1: "Device" +} +OP_COLORS = { + 'HcclAllreduce': "good", + 'HcclAllReduce': "good", + 'HcclAllGather': "bad", + 'HcclBroadcast': "yellow", + 'HcclReduceScatter': "olive", + 'HcclSend': "good", + 'HcclReceive': "good", + 'HcclBatchSendRecv': "thread_state_runnable" +} + + +def create_args(row): + return { + "id": row["Id"], + "comm_group": row["comm_group"], + "count": row["count"] + } + + +def split_df(df): + """ + 根据 mode 列将 DataFrame 拆分为 host 和 device 两个 DataFrame + """ + df_host = df[df['SourceKind'] == 0] + df_device = df[df['SourceKind'] == 1] + return df_host, df_device + + +def process_df(data_df, device_id, id2name_dict: dict): + """ + 对 DataFrame 进行处理,包括分组聚合、列拆分、添加新列等操作 + """ + + data_df["Name"] = data_df['Id'].map(id2name_dict) + df = data_df.groupby('Id').agg({ + 'Timestamp': ['min', 'max'], + 'Kind': 'first', + 'SourceKind': 'first', + 'Name': 'first', + }).reset_index() + df.columns = ['Id', 'start', 'end', 'Kind', 'SourceKind', 'Name'] + df[['comm_op', 'comm_group', 'data_type', 'count']] = df['Name'].str.replace('comm:', '').str.split(',', + expand=True) + df = df.drop(columns=['Name']) + df['cat'] = "hccl" + df['name'] = df['comm_op'] + df['cname'] = df['comm_op'].map(OP_COLORS) + df['end'] = df['end'] / 1000. + df['start'] = df['start'] / 1000. + df['dur'] = df['end'] - df['start'] + df['ph'] = "X" + df['pid'] = f"rank_{device_id}" + df['tid'] = df["SourceKind"].map(MODE) + df['args'] = df.apply(create_args, axis=1) + result = df[['cat', 'name', 'ph', 'pid', 'tid', 'start', 'dur', 'cname', 'args']].rename( + columns={'start': 'ts'}).to_dict(orient='records') + return result + + +def process_files(root_path, debug: bool = False): + """ + 处理指定路径下的所有 CSV 文件 + """ + csv_files = [file for file in os.listdir(root_path) if file.endswith("csv") and "device" not in file] + all_ranks = [] + for csv_file in csv_files: + print(f"start file: {csv_file}") + csv_file_path = os.path.join(root_path, csv_file) + df = pd.read_csv(csv_file_path) + if debug: + df = df.head(12) + + id2name_dict = df[df['Name'].notna()].set_index('Id')['Name'].to_dict() + # df['name'] = df.groupby('id')['name'].transform(lambda x: x.ffill().bfill()) + df_host, df_device = split_df(df) + device_id = df_device['msptiObjecId_Ds_DeviceId'].unique()[0] + host_result = process_df(df_host, device_id, id2name_dict) + all_ranks.extend(host_result) + device_result = process_df(df_device, device_id, id2name_dict) + all_ranks.extend(device_result) + return all_ranks + + +def save_to_json(all_ranks, files_path): + """ + 将处理结果保存为 JSON 文件 + """ + output = { + "traceEvents": all_ranks, + "stackFrames": {} + } + json_output = json.dumps(output, indent=4) + with open(os.path.join(files_path, f'mspti_comm_ops_timeline.json'), 'w') as f: + f.write(json_output) + + +def convert_mspti_timeline(data_path: str): + convert_jsons2csv(data_path) + all_ranks = process_files(data_path) + save_to_json(all_ranks, data_path) + + +if __name__ == "__main__": + files_path = "D:\\startwork\\AOPS\\09-25年技术规划\\Code\\mspti_test-megatron-0224\\mspti_test-megatron-0224\\data\\log\\all_merge" + convert_jsons2csv(files_path) + all_ranks = process_files(files_path) + save_to_json(all_ranks, files_path) \ No newline at end of file diff --git a/systrace/convert/convert_pytorch_to_timeline.py b/systrace/convert/convert_pytorch_to_timeline.py new file mode 100644 index 0000000000000000000000000000000000000000..bb42d8b2d80767786166a7cefc17fb22056017bf --- /dev/null +++ b/systrace/convert/convert_pytorch_to_timeline.py @@ -0,0 +1,50 @@ +import json +import systrace_pb2 +import argparse +import glob + +def process_timeline_file(input_path, trace_data): + with open(input_path, "rb") as f: + pytorch_data = systrace_pb2.Pytorch() + pytorch_data.ParseFromString(f.read()) + + for stage in pytorch_data.pytorch_stages: + trace_data["traceEvents"].append({ + "name": stage.stage_type, + "cat": "pytorch", + "ph": "X", + "pid": pytorch_data.rank, + "tid": pytorch_data.rank if "GC" not in stage.stage_type else f"{pytorch_data.rank}:gc", + "ts": stage.start_us, + "dur": stage.end_us - stage.start_us, + "args": { + "stage_id": stage.stage_id, + "comm": pytorch_data.comm, + "stack_frames": list(stage.stack_frames), + "gc_collected": stage.gc_debug.collected if stage.HasField("gc_debug") else 0, + "gc_uncollectable": stage.gc_debug.uncollectable if stage.HasField("gc_debug") else 0 + } + }) + +def aggregate_timeline_files(output_path): + trace_data = { + "traceEvents": [], + "displayTimeUnit": "ns", + "metadata": {"format": "Pytorch Profiler"} + } + + for timeline_file in glob.glob("*timeline"): + print(f"Processing {timeline_file}") + process_timeline_file(timeline_file, trace_data) + + trace_data["traceEvents"].sort(key=lambda x: x["args"]["stage_id"]) + + with open(output_path, "w") as f: + json.dump(trace_data, f, indent=None, separators=(',', ':')) + print(f"Aggregated {len(trace_data['traceEvents'])} events to {output_path}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Aggregate all *.timeline files into a single JSON') + parser.add_argument('--output', required=True, help='Output JSON file path') + args = parser.parse_args() + aggregate_timeline_files(args.output) \ No newline at end of file diff --git a/systrace/hack/format.sh b/systrace/hack/format.sh new file mode 100644 index 0000000000000000000000000000000000000000..9a05c76ccf81b96a6978d6beb3f22b7853bee2eb --- /dev/null +++ b/systrace/hack/format.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +function install_clang_format() { + if ! command -v clang-format &> /dev/null; then + echo "Installing clang-format..." + if command -v apt &> /dev/null; then + sudo apt install -y clang-format + elif command -v yum &> /dev/null; then + sudo yum install -y clang-format + else + echo "Error: Cannot install clang-format (unsupported package manager)." + exit 1 + fi + else + echo "clang-format is already installed." + fi +} + +function setup_clang_format() { + local clang_format_file=".clang-format" + if [ ! -f "$clang_format_file" ]; then + echo "Creating .clang-format with 4-space indentation and Allman braces..." + cat > "$clang_format_file" < + +namespace systrace +{ +namespace constant +{ + +struct TorchTraceConstant +{ + public: + static constexpr int DEFAULT_TRACE_COUNT = 1000; + static constexpr std::string_view DEFAULT_TRACE_DUMP_PATH = + SYS_TRACE_ROOT_DIR "timeline"; +}; + +} // namespace constant +} // namespace systrace \ No newline at end of file diff --git a/systrace/include/common/logging.cc b/systrace/include/common/logging.cc new file mode 100644 index 0000000000000000000000000000000000000000..f7dec9774ee15a0234ee7893ef06571323086956 --- /dev/null +++ b/systrace/include/common/logging.cc @@ -0,0 +1,6 @@ +#include "logging.h" + +namespace systrace +{ +void setLoggingPath() { return; } +} // namespace systrace \ No newline at end of file diff --git a/systrace/include/common/logging.h b/systrace/include/common/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..dc7ef35acc43f2959a3ffd6736f2d1cf094c7103 --- /dev/null +++ b/systrace/include/common/logging.h @@ -0,0 +1,28 @@ +#pragma once + +enum LogLevel +{ + INFO, + WARNING, + ERROR, + FATAL +}; + +#define LOG(level) \ + if (level == INFO) \ + std::cerr << "[INFO] "; \ + else if (level == WARNING) \ + std::cerr << "[WARNING] "; \ + else if (level == ERROR) \ + std::cerr << "[ERROR] "; \ + else if (level == FATAL) \ + std::cerr << "[FATAL] "; \ + std::cerr + +#define STLOG(level) \ + LOG(level) << ::systrace::util::config::GlobalConfig::Instance().rank_str + +namespace systrace +{ +void setLoggingPath(); +} \ No newline at end of file diff --git a/systrace/include/common/macro.h b/systrace/include/common/macro.h new file mode 100644 index 0000000000000000000000000000000000000000..56d60ab48fd16dd4ef02c9339c3509a9c2df8bb4 --- /dev/null +++ b/systrace/include/common/macro.h @@ -0,0 +1,17 @@ +#pragma once +#define EXPOSE_API __attribute__((visibility("default"))) + +#define SETUP_SYMBOL_FOR_LOAD_LIBRARY(handle, symbol, func_ptr, func_type, \ + msg) \ + do \ + { \ + func_ptr = (func_type)dlsym(handle, symbol); \ + const char *dlsym_error = dlerror(); \ + if (dlsym_error) \ + { \ + STLOG(WARNING) << "Load fn `" << symbol << "` error in " << msg \ + << dlsym_error; \ + is_usable_ = false; \ + return; \ + } \ + } while (0) diff --git a/systrace/include/common/shared_constants.h b/systrace/include/common/shared_constants.h new file mode 100644 index 0000000000000000000000000000000000000000..d4408e4c5467ad41c68169dc3c41092d2662d618 --- /dev/null +++ b/systrace/include/common/shared_constants.h @@ -0,0 +1,12 @@ +#ifdef __cplusplus +extern "C" +{ +#endif + + extern int global_stage_id; + extern int global_stage_type; +#define SYS_TRACE_ROOT_DIR "/home/sysTrace/" + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/systrace/include/common/util.cc b/systrace/include/common/util.cc new file mode 100644 index 0000000000000000000000000000000000000000..a526d0c4f96eb4f333976f2eee53d90e28512918 --- /dev/null +++ b/systrace/include/common/util.cc @@ -0,0 +1,253 @@ +#include "util.h" +#include "constant.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace systrace +{ +namespace util +{ + +namespace env +{ +std::string_view EnvVarRegistry::DEFAULT_VALUE_STRING = "NONE"; +int EnvVarRegistry::DEFAULT_VALUE_INT = 0; +bool EnvVarRegistry::DEFAULT_VALUE_BOOL = false; +} // namespace env +namespace fs_utils +{ + +int CreateDirectoryIfNotExists(const std::string &path) +{ + std::filesystem::path d_path(path); + try + { + if (!std::filesystem::exists(d_path)) + { + std::filesystem::create_directories(d_path); + } + if (!std::filesystem::is_directory(d_path)) + { + LOG(ERROR) << "Path exists but is not a directory: " << path; + return 1; + } + } + catch (const std::filesystem::filesystem_error &e) + { + LOG(ERROR) << "Failed to create directory " << path << ": " << e.what(); + return 1; + } + return 0; +} + +std::string GenerateClusterUniqueFilename(const std::string &suffix) +{ + try + { + char hostname[128]; + gethostname(hostname, sizeof(hostname)); + std::ostringstream oss; + oss << hostname << "--" << std::setw(5) << std::setfill('0') + << config::GlobalConfig::Instance().rank << suffix; + return oss.str(); + } + catch (const std::exception &e) + { + LOG(ERROR) << "Filename generation failed: " << e.what(); + return "error_" + std::to_string(std::time(nullptr)) + suffix; + } +} + +} // namespace fs_utils + +namespace config +{ + +class DeviceManager +{ + public: + static constexpr uint64_t MAX_DEVICES = 16; + static constexpr const char *DEVICE_PATH_PREFIX = "/dev/davinci"; + + static std::vector DetectAvailableDevices() + { + std::vector available_devices; + available_devices.reserve(MAX_DEVICES); + + for (uint64_t device_index = 0; device_index < MAX_DEVICES; + ++device_index) + { + if (IsDevicePresent(device_index)) + { + available_devices.push_back(device_index); + if (config::GlobalConfig::Instance().local_rank == 0) + { + LOG(INFO) + << "Found device: " << GetDevicePath(device_index); + } + } + } + + std::sort(available_devices.begin(), available_devices.end()); + return available_devices; + } + + private: + static bool IsDevicePresent(uint64_t index) + { + return std::filesystem::exists(GetDevicePath(index)); + } + + static std::string GetDevicePath(uint64_t index) + { + return std::string(DEVICE_PATH_PREFIX) + std::to_string(index); + } +}; + +namespace +{ + +GlobalConfig &config = GlobalConfig::Instance(); + +void LoadEnvironmentVariables() +{ + auto loadInt = [](const char *name) + { return env::EnvVarRegistry::GetEnvVar(name); }; + + auto loadStr = [](const char *name) + { return env::EnvVarRegistry::GetEnvVar(name); }; + + config.rank = loadInt("RANK"); + config.job_name = loadStr("ENV_ARGO_WORKFLOW_NAME"); + config.local_rank = loadInt("LOCAL_RANK"); + config.local_world_size = loadInt("LOCAL_WORLD_SIZE"); + config.world_size = loadInt("WORLD_SIZE"); + config.rank_str = "[RANK " + std::to_string(config.rank) + "] "; +} + +void ValidateDeviceConfiguration() +{ + config.devices = DeviceManager::DetectAvailableDevices(); + + if (config.devices.empty()) + { + config.enable = false; + LOG(WARNING) << "No devices found, disabling tracing"; + return; + } + + if (config.local_world_size != config.devices.size()) + { + LOG(WARNING) << "Local world size mismatch, disabling hook"; + config.enable = false; + } +} + +} // namespace + +void InitializeGlobalConfiguration() +{ + LOG(INFO) << "Initializing global configuration"; + + try + { + LoadEnvironmentVariables(); + ValidateDeviceConfiguration(); + LOG(INFO) << "Global configuration initialized successfully"; + } + catch (const std::exception &e) + { + LOG(ERROR) << "Global config initialization failed: " << e.what(); + throw; + } +} + +} // namespace config + +namespace environment +{ + +bool IsValidEnvironmentVariableName(const std::string &name) +{ + if (name.empty() || !isalpha(name[0])) + { + return false; + } + + for (char c : name) + { + if (!isalnum(c) && c != '_') + { + return false; + } + } + return true; +} + +void RegisterRequiredEnvironmentVariables() +{ + try + { + if (!IsValidEnvironmentVariableName("ENV_ARGO_WORKFLOW_NAME")) + { + throw std::invalid_argument( + "Invalid env var name: ENV_ARGO_WORKFLOW_NAME"); + } + REGISTER_ENVIRONMENT_VARIABLE( + "ENV_ARGO_WORKFLOW_NAME", + env::EnvVarRegistry::DEFAULT_VALUE_STRING); + + if (!IsValidEnvironmentVariableName("SYSTRACE_SYMS_FILE")) + { + throw std::invalid_argument( + "Invalid env var name: SYSTRACE_SYMS_FILE"); + } + REGISTER_ENVIRONMENT_VARIABLE( + "SYSTRACE_SYMS_FILE", env::EnvVarRegistry::DEFAULT_VALUE_STRING); + + if (!IsValidEnvironmentVariableName("SYSTRACE_LOGGING_DIR")) + { + throw std::invalid_argument( + "Invalid env var name: SYSTRACE_LOGGING_DIR"); + } + REGISTER_ENVIRONMENT_VARIABLE( + "SYSTRACE_LOGGING_DIR", env::EnvVarRegistry::DEFAULT_VALUE_STRING); + + if (!IsValidEnvironmentVariableName("SYSTRACE_HOST_TRACING_FUNC")) + { + throw std::invalid_argument( + "Invalid env var name: SYSTRACE_HOST_TRACING_FUNC"); + } + REGISTER_ENVIRONMENT_VARIABLE( + "SYSTRACE_HOST_TRACING_FUNC", + env::EnvVarRegistry::DEFAULT_VALUE_STRING); + + REGISTER_ENVIRONMENT_VARIABLE("RANK", 0); + REGISTER_ENVIRONMENT_VARIABLE("LOCAL_RANK", 0); + REGISTER_ENVIRONMENT_VARIABLE("LOCAL_WORLD_SIZE", 1); + REGISTER_ENVIRONMENT_VARIABLE("WORLD_SIZE", 1); + REGISTER_ENVIRONMENT_VARIABLE("SYSTRACE_LOGGING_APPEND", false); + } + catch (const std::exception &e) + { + LOG(ERROR) << "Environment variable registration failed: " << e.what(); + throw; + } +} + +} // namespace environment + +void InitializeSystemUtilities() +{ + environment::RegisterRequiredEnvironmentVariables(); + config::InitializeGlobalConfiguration(); +} + +} // namespace util +} // namespace systrace \ No newline at end of file diff --git a/systrace/include/common/util.h b/systrace/include/common/util.h new file mode 100644 index 0000000000000000000000000000000000000000..67c077af263cb8e86b886f0b03f3f303903429f4 --- /dev/null +++ b/systrace/include/common/util.h @@ -0,0 +1,290 @@ +#pragma once + +#include "logging.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace systrace +{ +namespace util +{ +namespace config +{ + +struct GlobalConfig +{ + uint32_t rank{0}; + uint32_t local_rank{0}; + uint32_t world_size{0}; + uint32_t local_world_size{0}; + std::string job_name; + bool enable{true}; + std::vector devices; + std::string rank_str; + + static GlobalConfig &Instance() + { + static GlobalConfig instance; + return instance; + } + + private: + GlobalConfig() = default; +}; + +void InitializeGlobalConfiguration(); + +} // namespace config + +namespace fs_utils +{ + +std::string GenerateClusterUniqueFilename(const std::string &suffix); +int CreateDirectoryIfNotExists(const std::string &path); + +} // namespace fs_utils + +namespace resource +{ +template class TimerPool +{ + public: + TimerPool() = default; + TimerPool(const TimerPool &) = delete; + TimerPool &operator=(const TimerPool &) = delete; + + template T *getObject() + { + std::lock_guard lock(mutex_); + + T *obj = pool_.empty() ? nullptr : pool_.front(); + if (obj) + { + pool_.pop_front(); + } + + return obj ? obj : (Init ? new T() : nullptr); + } + + void returnObject(T *obj, int *size) + { + if (!obj) + { + if (size) + *size = 0; + return; + } + + std::lock_guard lock(mutex_); + pool_.push_back(obj); + if (size) + *size = static_cast(pool_.size()); + } + + void clear() + { + std::lock_guard lock(mutex_); + for (auto obj : pool_) + { + delete obj; + } + pool_.clear(); + } + + ~TimerPool() { clear(); } + + private: + std::deque pool_; + std::mutex mutex_; +}; + +} // namespace resource + +namespace env +{ + +class EnvVarRegistry +{ + public: + using VarType = std::variant; + + static std::string_view DEFAULT_VALUE_STRING; + static int DEFAULT_VALUE_INT; + static bool DEFAULT_VALUE_BOOL; + + static void RegisterEnv(const std::string &name, VarType default_value) + { + auto ®istry = GetRegistryManager(); + LOG(INFO) << "[ENV] Register ENV " << name << " with default " + << VariantToString(default_value) << std::endl; + registry[name] = std::move(default_value); + } + + // Get an env var value, with optional printing + template static T GetEnvVar(const std::string &name) + { + static_assert(is_supported_type(), + "Unsupported type for environment variable"); + + auto ®istry = GetRegistryManager(); + bool set = false; + + // Try to get from environment first + T result = getEnvInner(name, &set); + if (set) + { + LOG(INFO) << "[ENV] Get " << name << "=" << result + << " from environment" << std::endl; + return result; + } + + // Try to get from registered defaults + if (auto it = registry.find(name); it != registry.end()) + { + if (const T *val = std::get_if(&it->second)) + { + LOG(INFO) << "[ENV] Get " << name << "=" << *val + << " from register default" << std::endl; + return *val; + } + LOG(FATAL) << "[ENV] Wrong data type in `GetEnvVar`" << std::endl; + } + + // Fall back to static default + result = getDefault(); + LOG(WARNING) << "[ENV] Get not register env " << name << "=" << result + << " from default" << std::endl; + return result; + } + + template + static inline auto convert_to_variant(const T &s) + -> std::enable_if_t, VarType> + { + return std::string(s); + } + + template + static inline auto convert_to_variant(const T &val) + -> std::enable_if_t, VarType> + { + return val; + } + + private: + template static constexpr bool is_supported_type() + { + return std::is_same_v || std::is_same_v || + std::is_same_v; + } + + static std::string toLower(const std::string &str) + { + std::string lower; + lower.reserve(str.size()); + std::transform(str.begin(), str.end(), std::back_inserter(lower), + [](unsigned char c) { return std::tolower(c); }); + return lower; + } + + // 值解析器 + template static T parseEnvValue(const char *env) + { + if constexpr (std::is_same_v) + { + try + { + return std::stoi(env); + } + catch (...) + { + return DEFAULT_VALUE_INT; + } + } + else if constexpr (std::is_same_v) + { + std::string lower = toLower(env); + if (lower == "true" || lower == "1") + return true; + if (lower == "false" || lower == "0") + return false; + return std::stoi(env) != 0; + } + else if constexpr (std::is_same_v) + { + return env; + } + } + + // Get value from real environment + template + static T getEnvInner(const std::string &env_name, bool *set) + { + const char *env = std::getenv(env_name.c_str()); + if (!env) + { + *set = false; + return {}; + } + + *set = true; + return parseEnvValue(env); + } + + // Default values for fallback + template static T getDefault() + { + if constexpr (std::is_same_v) + { + return DEFAULT_VALUE_INT; + } + else if constexpr (std::is_same_v) + { + return DEFAULT_VALUE_BOOL; + } + else if constexpr (std::is_same_v) + { + return std::string(DEFAULT_VALUE_STRING); + } + } + + static inline std::unordered_map &GetRegistryManager() + { + static std::unordered_map registry_manager; + return registry_manager; + } + + static std::string VariantToString(const VarType &var) + { + return std::visit( + [](const auto &value) + { + std::stringstream ss; + ss << value; + return ss.str(); + }, + var); + } +}; + +#define REGISTER_ENVIRONMENT_VARIABLE(name, value) \ + ::systrace::util::env::EnvVarRegistry::RegisterEnv( \ + name, \ + ::systrace::util::env::EnvVarRegistry::convert_to_variant(value)) + +void REGISTER_ENV(); + +} // namespace env +void InitializeSystemUtilities(); +} // namespace util +} // namespace systrace \ No newline at end of file diff --git a/systrace/protos/CMakeLists.txt b/systrace/protos/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..56eb91f320f796bbab740bb67c1c6aeb64f84647 --- /dev/null +++ b/systrace/protos/CMakeLists.txt @@ -0,0 +1,17 @@ +project(general_pb2) + +set(PB_FILES systrace.pb.cc systrace.pb-c.c) + +add_library(${PROJECT_NAME} STATIC ${PB_FILES}) + +include_directories(${PROJECT_SOURCE_DIR} + ${GOOGLE_PROTOBUF_DIR}/include +) + +link_directories(${GOOGLE_PROTOBUF_DIR}/lib/) + +target_link_libraries(${PROJECT_NAME} + protobuf + protobuf-c +) + diff --git a/systrace/protos/systrace.proto b/systrace/protos/systrace.proto new file mode 100644 index 0000000000000000000000000000000000000000..aa0e8f71b85f36be112326aff86ada54bbd19bb4 --- /dev/null +++ b/systrace/protos/systrace.proto @@ -0,0 +1,62 @@ +syntax = "proto3"; + +message StackFrame { + uint64 address = 1; + string so_name = 2; +} + +message MemAllocEntry { + uint64 alloc_ptr = 1; + uint32 stage_id = 2; + StageType stage_type = 3; + uint64 mem_size = 4; + repeated StackFrame stack_frames = 5; +} + +message MemFreeEntry { + uint64 alloc_ptr = 1; + uint32 stage_id = 2; + StageType stage_type = 3; +} + +message ProcMem { + uint32 pid = 1; + repeated MemAllocEntry mem_alloc_stacks = 2; + repeated MemFreeEntry mem_free_stacks = 3; +} + +enum StageType { + STAGE_UNKNOWN = 0; + STAGE_DATALOADER = 1; + STAGE_FORWARD = 2; + STAGE_BACKWARD = 3; + STAGE_SYNCHRONIZATION = 4; + STAGE_GC = 5; +} + +message GcDebugData { + uint32 collected = 1; + uint32 uncollectable = 2; +} + +message PytorchStage { + uint32 stage_id = 1; + string stage_type = 2; + uint64 start_us = 3; + uint64 end_us = 4; + repeated string stack_frames = 5; + oneof debug_data { + GcDebugData gc_debug = 6; + } +} + +message Pytorch { + repeated PytorchStage pytorch_stages = 1; + uint32 rank = 2; + uint32 step_id = 3; + string comm = 4; //任务名 +} + +message Mem { + repeated ProcMem proc_mem = 1; +} \ No newline at end of file diff --git a/systrace/src/ascend/hook.cc b/systrace/src/ascend/hook.cc new file mode 100644 index 0000000000000000000000000000000000000000..c1681285b5a96d85dcc0b3a2191f8611e7a66224 --- /dev/null +++ b/systrace/src/ascend/hook.cc @@ -0,0 +1,74 @@ +#include "hook.h" +#include "../src/trace/systrace_manager.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + static void *load_symbol(const char *func_name) + { + if (!g_hal_lib) + { + g_hal_lib = dlopen("libascendcl.so", RTLD_LAZY); + if (!g_hal_lib) + { + fprintf(stderr, "[Hook] Failed to dlopen libascendcl.so: %s\n", + dlerror()); + return nullptr; + } + } + + void *func = dlsym(g_hal_lib, func_name); + if (!func) + { + fprintf(stderr, "[Hook] Failed to dlsym %s: %s\n", func_name, + dlerror()); + } + else + { + std::cout << "[Hook] Successfully hooked " << func_name + << std::endl; + } + return func; + } + +#define HOOKED_FUNCTION(func_ptr, func_name, ...) \ + if (!func_ptr) \ + { \ + func_ptr = (decltype(func_ptr))load_symbol(func_name); \ + if (!func_ptr) \ + return -1; \ + } \ + ::systrace::SysTrace::getInstance(); \ + return func_ptr(__VA_ARGS__); + + EXPOSE_API aclError aclInit(const char *configPath) + { + HOOKED_FUNCTION(orig_aclInit, "aclInit", configPath); + } + + EXPOSE_API aclError aclrtMapMem(void *virPtr, size_t size, size_t offset, + aclrtDrvMemHandle handle, uint64_t flags) + { + HOOKED_FUNCTION(orig_aclrtMapMem, "aclrtMapMem", virPtr, size, offset, + handle, flags); + } + + EXPOSE_API aclError aclrtLaunchKernel(aclrtFuncHandle func, int workDim, + void **workGroup, + size_t *localWorkSize, + aclrtStream stream, void *event, + void *config) + { + HOOKED_FUNCTION(orig_aclrtLaunchKernel, "aclrtLaunchKernel", func, + workDim, workGroup, localWorkSize, stream, event, + config); + } + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/systrace/src/ascend/hook.h b/systrace/src/ascend/hook.h new file mode 100644 index 0000000000000000000000000000000000000000..adbe0a03e02996f1437024ba9ea252a854659615 --- /dev/null +++ b/systrace/src/ascend/hook.h @@ -0,0 +1,41 @@ +#pragma once +#include "../../include/common/macro.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + typedef int aclError; + typedef void *aclrtStream; + typedef void *aclrtFuncHandle; + typedef void *aclrtDrvMemHandle; + + typedef aclError (*aclInitFn)(const char *); + typedef aclError (*aclrtMapMemFn)(void *, size_t, size_t, aclrtDrvMemHandle, + uint64_t); + typedef aclError (*aclrtLaunchKernelFn)(aclrtFuncHandle, int, void **, + size_t *, aclrtStream, void *, + void *); + + extern void *ascend_hal_handle; + extern aclInitFn orig_aclInit; + extern aclrtMapMemFn orig_aclrtMapMem; + extern aclrtLaunchKernelFn orig_aclrtLaunchKernel; + + aclError aclInit(const char *configPath); + aclError aclrtMapMem(void *virPtr, size_t size, size_t offset, + aclrtDrvMemHandle handle, uint64_t flags); + aclError aclrtLaunchKernel(aclrtFuncHandle func, int workDim, + void **workGroup, size_t *localWorkSize, + aclrtStream stream, void *event, void *config); + + static void *g_hal_lib = nullptr; + aclInitFn orig_aclInit = nullptr; + aclrtMapMemFn orig_aclrtMapMem = nullptr; + aclrtLaunchKernelFn orig_aclrtLaunchKernel = nullptr; +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/systrace/src/cann/cann_hook.c b/systrace/src/cann/cann_hook.c new file mode 100644 index 0000000000000000000000000000000000000000..a1bd27d6f7756c9754df4e24e1f6ba384be2cd21 --- /dev/null +++ b/systrace/src/cann/cann_hook.c @@ -0,0 +1,532 @@ +#define _GNU_SOURCE +#include "../../include/common/shared_constants.h" +#include "../../protos/systrace.pb-c.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__aarch64__) +#include "../../thirdparty/aarch64/libunwind/libunwind.h" +#elif defined(__x86_64__) +#include "../../thirdparty/x86_64/libunwind/libunwind.h" +#else +#error "Unsupported architecture - only aarch64 and x86_64 are supported" +#endif + +// export LD_PRELOAD=/home/MindSpeed-LLM-1.0.RC3/libascend_hal_jack.so +// cd /home/hbdir/mspti_test-megatron +// conda activate mspti10 +// python -m torch.distributed.launch --nproc_per_node=8 nqq_train_fsdp.py +// protoc --c_out=. tmp.proto + +// drvError_t halMemAlloc(void **pp, unsigned long long size, unsigned long long +// flag); drvError_t halMemFree(void *pp); drvError_t +// halMemCreate(drv_mem_handle_t **handle, size_t size, const struct +// drv_mem_prop *prop, uint64_t flag); drvError_t halMemRelease +// (drv_mem_handle_t *handle); + +#define LOG_INTERVAL_SEC 120 +#define LOG_ITEMS_MIN 1000 + +typedef int drvError_t; + +typedef enum aclrtMemMallocPolicy +{ + ACL_MEM_MALLOC_HUGE_FIRST, + ACL_MEM_MALLOC_HUGE_ONLY, + ACL_MEM_MALLOC_NORMAL_ONLY, + ACL_MEM_MALLOC_HUGE_FIRST_P2P, + ACL_MEM_MALLOC_HUGE_ONLY_P2P, + ACL_MEM_MALLOC_NORMAL_ONLY_P2P, + ACL_MEM_TYPE_LOW_BAND_WIDTH = 0x0100, + ACL_MEM_TYPE_HIGH_BAND_WIDTH = 0x1000, +} aclrtMemMallocPolicy; +typedef drvError_t (*halMemAllocFunc_t)(void **pp, unsigned long long size, + unsigned long long flag); +typedef drvError_t (*halMemFreeFunc_t)(void *pp); +typedef drvError_t (*halMemCreateFunc_t)(void **handle, size_t size, void *prop, + uint64_t flag); +typedef drvError_t (*halMemReleaseFunc_t)(void *handle); + +typedef drvError_t (*aclrtMallocFunc_t)(void **devPtr, size_t size, + aclrtMemMallocPolicy policy); +typedef drvError_t (*aclrtMallocCachedFunc_t)(void **devPtr, size_t size, + aclrtMemMallocPolicy policy); +typedef drvError_t (*aclrtMallocAlign32Func_t)(void **devPtr, size_t size, + aclrtMemMallocPolicy policy); +typedef drvError_t (*aclrtFreeFunc_t)(void *devPtr); + +static halMemAllocFunc_t orig_halMemAlloc = NULL; +static halMemFreeFunc_t orig_halMemFree = NULL; +static halMemCreateFunc_t orig_halMemCreate = NULL; +static halMemReleaseFunc_t orig_halMemRelease = NULL; +static aclrtMallocFunc_t orig_aclrtMalloc = NULL; +static aclrtMallocCachedFunc_t orig_aclrtMallocCached = NULL; +static aclrtMallocAlign32Func_t orig_aclrtMallocAlign32 = NULL; +static aclrtFreeFunc_t orig_aclrtFree = NULL; + +static pthread_key_t thread_data_key; +static pthread_once_t key_once = PTHREAD_ONCE_INIT; +static pthread_mutex_t file_mutex = PTHREAD_MUTEX_INITIALIZER; +extern int global_stage_id; +extern int global_stage_type; + +typedef struct +{ + ProcMem *proc_mem; + time_t last_log_time; +} ThreadData; + +static void *load_symbol(void *lib, const char *symbol_name) +{ + void *sym = dlsym(lib, symbol_name); + if (!sym) + { + fprintf(stderr, "Failed to find symbol %s: %s\n", symbol_name, + dlerror()); + } + return sym; +} + +static void free_proc_mem(ProcMem *proc_mem) +{ + if (!proc_mem) + return; + + // 释放分配记录 + for (size_t i = 0; i < proc_mem->n_mem_alloc_stacks; i++) + { + MemAllocEntry *entry = proc_mem->mem_alloc_stacks[i]; + for (size_t j = 0; j < entry->n_stack_frames; j++) + { + free((void *)entry->stack_frames[j]->so_name); + free(entry->stack_frames[j]); + } + free(entry->stack_frames); + free(entry); + } + free(proc_mem->mem_alloc_stacks); + + // 释放释放记录 + for (size_t i = 0; i < proc_mem->n_mem_free_stacks; i++) + { + free(proc_mem->mem_free_stacks[i]); + } + free(proc_mem->mem_free_stacks); + + // 重置计数 + proc_mem->n_mem_alloc_stacks = 0; + proc_mem->mem_alloc_stacks = NULL; + proc_mem->n_mem_free_stacks = 0; + proc_mem->mem_free_stacks = NULL; +} + +static void free_thread_data(void *data) +{ + ThreadData *td = (ThreadData *)data; + if (td && td->proc_mem) + { + free_proc_mem(td->proc_mem); + free(td->proc_mem); + } + free(td); +} + +static inline uint32_t get_current_pid() { return (uint32_t)getpid(); } + +static void make_key() +{ + pthread_key_create(&thread_data_key, free_thread_data); +} + +static ThreadData *get_thread_data() +{ + ThreadData *td; + + pthread_once(&key_once, make_key); + td = pthread_getspecific(thread_data_key); + + if (!td) + { + td = calloc(1, sizeof(ThreadData)); + td->proc_mem = calloc(1, sizeof(ProcMem)); + proc_mem__init(td->proc_mem); + td->proc_mem->pid = get_current_pid(); + td->last_log_time = time(NULL); + pthread_setspecific(thread_data_key, td); + } + + return td; +} + +static const char *get_so_name(uint64_t ip) +{ + Dl_info info; + const char *so_name; + if (dladdr((void *)ip, &info)) + { + so_name = strrchr(info.dli_fname, '/'); + return (so_name != NULL) ? so_name + 1 : info.dli_fname; + } + return "unknown"; +} + +static void get_log_filename(time_t current, uint32_t pid, char *buf, + size_t buf_size) +{ + const char *rank_str = getenv("RANK"); + int rank = rank_str ? atoi(rank_str) : 0; + struct tm *tm = localtime(¤t); + + const char *dir_path = SYS_TRACE_ROOT_DIR "cann"; + if (access(dir_path, F_OK) != 0) + { + if (mkdir(dir_path, 0755) != 0 && errno != EEXIST) + { + perror("Failed to create directory"); + snprintf(buf, buf_size, "mem_trace_%04d%02d%02d_%02d_%u_rank%d.pb", + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, pid, rank); + return; + } + } + snprintf(buf, buf_size, "%s/mem_trace_%04d%02d%02d_%02d_%u_rank%d.pb", + dir_path, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, pid, rank); +} + +static char is_ready_to_write(ThreadData *td, time_t *current) +{ + ProcMem *proc_mem = td->proc_mem; + if (!proc_mem || + (proc_mem->n_mem_alloc_stacks + proc_mem->n_mem_free_stacks == 0)) + { + return 0; + } + + *current = time(NULL); + if (proc_mem->n_mem_alloc_stacks + proc_mem->n_mem_free_stacks < + LOG_ITEMS_MIN) + { + if (*current - td->last_log_time < LOG_INTERVAL_SEC) + { + return 0; + } + } + + return 1; +} + +static void write_protobuf_to_file() +{ + time_t current; + uint8_t *buf; + ThreadData *td = get_thread_data(); + if (!td) + { + return; + } + + if (!is_ready_to_write(td, ¤t)) + { + return; + } + + if (pthread_mutex_trylock(&file_mutex) == 0) + { // pthread_mutex_trylock or pthread_mutex_lock + char filename[256]; + get_log_filename(current, td->proc_mem->pid, filename, + sizeof(filename)); + + size_t len = proc_mem__get_packed_size(td->proc_mem); + buf = malloc(len); + proc_mem__pack(td->proc_mem, buf); + + FILE *fp = fopen(filename, "ab"); + if (fp) + { + fwrite(buf, len, 1, fp); + fclose(fp); + } + + pthread_mutex_unlock(&file_mutex); + } + else + { + return; + } + + if (buf) + { + free(buf); + } + + free_proc_mem(td->proc_mem); + td->last_log_time = current; +} + +static void exit_handler(void) { write_protobuf_to_file(); } + +int init_mem_trace() +{ + void *lib = + dlopen("/usr/local/Ascend/ascend-toolkit/latest/lib64/libascendcl.so", + RTLD_LAZY); + if (!lib) + { + fprintf(stderr, "dlopen failed: %s\n", dlerror()); + return -1; + } + + orig_halMemAlloc = (halMemAllocFunc_t)load_symbol(lib, "halMemAlloc"); + orig_halMemFree = (halMemFreeFunc_t)load_symbol(lib, "halMemFree"); + orig_halMemCreate = (halMemCreateFunc_t)load_symbol(lib, "halMemCreate"); + orig_halMemRelease = (halMemReleaseFunc_t)load_symbol(lib, "halMemRelease"); + orig_aclrtMalloc = (aclrtMallocFunc_t)load_symbol(lib, "aclrtMalloc"); + orig_aclrtMallocCached = + (aclrtMallocCachedFunc_t)load_symbol(lib, "aclrtMallocCached"); + orig_aclrtMallocAlign32 = + (aclrtMallocAlign32Func_t)load_symbol(lib, "aclrtMallocAlign32"); + orig_aclrtFree = (aclrtFreeFunc_t)load_symbol(lib, "aclrtFree"); + + if (!orig_halMemAlloc || !orig_halMemFree || !orig_aclrtMalloc || + !orig_aclrtFree || !orig_halMemCreate || !orig_halMemRelease || + !orig_aclrtMallocCached || orig_aclrtMallocAlign32) + { + return -1; + } + + atexit(exit_handler); + + return 0; +} + +unw_word_t get_so_base(unw_word_t addr) +{ + Dl_info info; + if (dladdr((void *)addr, &info) != 0) + { + return (unw_word_t)info.dli_fbase; + } + return 0; +} + +static void collect_stack_frames(MemAllocEntry *entry) +{ + unw_cursor_t cursor; + unw_context_t context; + unw_word_t ip; + int frame_count = 0; + const int max_frames = 32; + + unw_getcontext(&context); + unw_init_local(&cursor, &context); + + entry->stack_frames = calloc(max_frames, sizeof(StackFrame *)); + while (unw_step(&cursor) > 0 && frame_count < max_frames) + { + unw_get_reg(&cursor, UNW_REG_IP, &ip); + + // Get the SO name and base address for this IP + const char *so_name = get_so_name(ip); + unw_word_t so_base = get_so_base(ip); // You'll need to implement this + + StackFrame *frame = malloc(sizeof(StackFrame)); + stack_frame__init(frame); + frame->address = + ip - so_base; // Store offset within SO instead of virtual address + frame->so_name = strdup(so_name); + + entry->stack_frames[frame_count] = frame; + entry->n_stack_frames++; + + frame_count++; + } +} + +static void add_mem_alloc_entry(void *pp, size_t size) +{ + ThreadData *td = get_thread_data(); + + MemAllocEntry *entry = malloc(sizeof(MemAllocEntry)); + mem_alloc_entry__init(entry); + entry->alloc_ptr = (uint64_t)pp; + entry->mem_size = size; + entry->stage_id = global_stage_id; + entry->stage_type = global_stage_type; + entry->n_stack_frames = 0; + entry->stack_frames = NULL; + + collect_stack_frames(entry); + + td->proc_mem->n_mem_alloc_stacks++; + td->proc_mem->mem_alloc_stacks = + realloc(td->proc_mem->mem_alloc_stacks, + td->proc_mem->n_mem_alloc_stacks * sizeof(MemAllocEntry *)); + td->proc_mem->mem_alloc_stacks[td->proc_mem->n_mem_alloc_stacks - 1] = + entry; +} + +static void add_mem_free_entry(void *pp) +{ + ThreadData *td = get_thread_data(); + + MemFreeEntry *entry = malloc(sizeof(MemFreeEntry)); + mem_free_entry__init(entry); + entry->alloc_ptr = (uint64_t)pp; + entry->stage_id = global_stage_id; + entry->stage_type = global_stage_type; + + td->proc_mem->n_mem_free_stacks++; + td->proc_mem->mem_free_stacks = + realloc(td->proc_mem->mem_free_stacks, + td->proc_mem->n_mem_free_stacks * sizeof(MemFreeEntry *)); + td->proc_mem->mem_free_stacks[td->proc_mem->n_mem_free_stacks - 1] = entry; +} + +drvError_t halMemAlloc(void **pp, unsigned long long size, + unsigned long long flag) +{ + if (!orig_halMemAlloc) + { + init_mem_trace(); + } + int ret = orig_halMemAlloc(pp, size, flag); + if (ret == 0 && pp && *pp) + { + add_mem_alloc_entry(*pp, size); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t halMemFree(void *pp) +{ + if (!orig_halMemFree) + { + init_mem_trace(); + } + int ret = orig_halMemFree(pp); + if (ret == 0 && pp) + { + add_mem_free_entry(pp); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy) +{ + if (!orig_aclrtMalloc) + { + init_mem_trace(); + } + int ret = orig_aclrtMalloc(devPtr, size, policy); + if (ret == 0 && devPtr && *devPtr) + { + add_mem_alloc_entry(*devPtr, size); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t aclrtMallocCached(void **devPtr, size_t size, + aclrtMemMallocPolicy policy) +{ + if (!orig_aclrtMallocCached) + { + init_mem_trace(); + } + int ret = orig_aclrtMallocCached(devPtr, size, policy); + if (ret == 0 && devPtr && *devPtr) + { + add_mem_alloc_entry(*devPtr, size); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t aclrtMallocAlign32(void **devPtr, size_t size, + aclrtMemMallocPolicy policy) +{ + if (!orig_aclrtMallocAlign32) + { + init_mem_trace(); + } + int ret = orig_aclrtMallocAlign32(devPtr, size, policy); + if (ret == 0 && devPtr && *devPtr) + { + add_mem_alloc_entry(*devPtr, size); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t aclrtFree(void *devPtr) +{ + if (!orig_aclrtFree) + { + init_mem_trace(); + } + int ret = orig_aclrtFree(devPtr); + if (ret == 0 && devPtr) + { + add_mem_free_entry(devPtr); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t halMemCreate(void **handle, size_t size, void *prop, uint64_t flag) +{ + if (!orig_halMemCreate) + { + init_mem_trace(); + } + int ret = orig_halMemCreate(handle, size, prop, flag); + if (ret == 0 && handle && *handle) + { + add_mem_alloc_entry(*handle, size); + } + + write_protobuf_to_file(); + + return ret; +} + +drvError_t halMemRelease(void *handle) +{ + if (!orig_halMemRelease) + { + init_mem_trace(); + } + + int ret = orig_halMemRelease(handle); + if (ret == 0 && handle) + { + add_mem_free_entry(handle); + } + + write_protobuf_to_file(); + + return ret; +} \ No newline at end of file diff --git a/systrace/src/mspti/json_file_writer.h b/systrace/src/mspti/json_file_writer.h new file mode 100644 index 0000000000000000000000000000000000000000..9b19788c2b2ee6f807e2b69a98640a438f977f1a --- /dev/null +++ b/systrace/src/mspti/json_file_writer.h @@ -0,0 +1,189 @@ +#pragma once +#include "../../include/common/shared_constants.h" +#include "../../include/common/util.h" +#include "mspti.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class MSPTIHcclFileWriter +{ + private: + std::ofstream file; + std::mutex buffermtx; + std::mutex bufferMarkerMtx; + std::mutex threadmtx; + std::atomic opened; + std::unique_ptr> markerActivityBuffer; + std::thread writerThread; + std::condition_variable cv; + std::atomic stop; + Json::Value root = Json::Value(Json::ValueType::arrayValue); + + public: + MSPTIHcclFileWriter(const std::string &filename) + { + // obtain environment variable LOCAL_RANK + // to determine the rank of the process + // and append it to the filename + const char *path = std::getenv("METRIC_PATH"); + std::string savePath = path ? path : SYS_TRACE_ROOT_DIR "mspti/"; + if (systrace::util::fs_utils::CreateDirectoryIfNotExists(savePath)) + { + STLOG(ERROR) << "[MSPTI] Failed to create dump directory"; + return; + } + std::string savePathStr = savePath; + if (!savePathStr.empty() && savePathStr.back() != '/') + { + savePathStr += "/"; + } + std::string saveFilename = savePathStr + filename; + std::string filenameWithRank = saveFilename; + this->markerActivityBuffer = + std::make_unique>(); + + const char *localRankCStr = std::getenv("RANK"); + if (localRankCStr == nullptr) + { + localRankCStr = "-1"; + } + std::string localRank = + localRankCStr; // Now safe to construct std::string + auto rank = std::stoi(localRank); + if (saveFilename.length() >= 5 && + saveFilename.substr(saveFilename.length() - 5) == ".json") + { + std::string baseName = + saveFilename.substr(0, saveFilename.length() - 5); + filenameWithRank = baseName + "." + std::to_string(rank) + ".json"; + } + else + { + filenameWithRank = saveFilename + "." + std::to_string(rank); + } + std::cout << "Filename: " << filenameWithRank << std::endl; + this->file.open(filenameWithRank, std::ios::out | std::ios::app); + this->opened.store(true); + this->stop.store(false); + this->run(); + } + + void stopWriter() + { + if (this->file.is_open()) + { + { + std::unique_lock lock(this->threadmtx); + this->stop.store(true); + } + this->cv.notify_all(); + this->hcclActivityFormatToJson(); + if (this->writerThread.joinable()) + { + this->writerThread.join(); + } + this->file.close(); + this->opened.store(false); + } + } + + ~MSPTIHcclFileWriter() { this->stopWriter(); } + + bool fileExists(const std::string &fp) + { + std::ifstream file(fp.c_str()); + return file.good() && file.is_open(); + } + + void bufferMarkerActivity(msptiActivityMarker *activity) + { + std::lock_guard lock(this->bufferMarkerMtx); + this->markerActivityBuffer->push_back(*activity); + } + + void run() + { + // a thread to periodically flush + // the buffer to the file + // watch the conditional variable for signal + this->writerThread = std::thread( + [this]() + { + while (!this->stop.load()) + { + std::unique_lock lock(this->threadmtx); + if (this->cv.wait_for(lock, std::chrono::seconds(5)) == + std::cv_status::timeout) + { + this->hcclActivityFormatToJson(); + } + else if (this->stop.load()) + { + break; + }; + } + }); + } + + void hcclActivityFormatToJson() + { + std::lock_guard lock(this->buffermtx); + if (this->file.is_open()) + { + for (auto activity : *this->markerActivityBuffer) + { + Json::Value markerJson; + markerJson["Kind"] = activity.kind; + markerJson["SourceKind"] = activity.sourceKind; + markerJson["Timestamp"] = activity.timestamp; + markerJson["Id"] = activity.id; + markerJson["Flag"] = activity.flag; + Json::Value msptiObjecId; + if (activity.sourceKind == MSPTI_ACTIVITY_SOURCE_KIND_HOST) + { + Json::Value pt; + pt["ProcessId"] = activity.objectId.pt.processId; + pt["ThreadId"] = activity.objectId.pt.threadId; + Json::Value ds; + ds["DeviceId"] = activity.objectId.pt.processId; + ds["StreamId"] = activity.objectId.pt.threadId; + msptiObjecId["Pt"] = pt; + msptiObjecId["Ds"] = ds; + } + else if (activity.sourceKind == + MSPTI_ACTIVITY_SOURCE_KIND_DEVICE) + { + Json::Value ds; + ds["DeviceId"] = activity.objectId.ds.deviceId; + ds["StreamId"] = activity.objectId.ds.streamId; + Json::Value pt; + pt["ProcessId"] = activity.objectId.ds.deviceId; + pt["ThreadId"] = activity.objectId.ds.streamId; + msptiObjecId["Pt"] = pt; + msptiObjecId["Ds"] = ds; + } + markerJson["msptiObjectId"] = msptiObjecId; + markerJson["Name"] = activity.name; + this->root.append(markerJson); + } + if (this->root.size() > 0) + { + Json::StyledWriter writer; + this->file << writer.write(this->root); + this->root.clear(); + } + this->markerActivityBuffer->clear(); + } + else + { + std::cout << "File is not open" << std::endl; + } + } +}; \ No newline at end of file diff --git a/systrace/src/mspti/mspti_tracker.cpp b/systrace/src/mspti/mspti_tracker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d6f1285f3172f482391ab7479def6392ba6563db --- /dev/null +++ b/systrace/src/mspti/mspti_tracker.cpp @@ -0,0 +1,96 @@ +#include "mspti_tracker.hpp" +#include +#include +#include + +constexpr size_t KB = 1 * 1024; +constexpr size_t MB = 1 * 1024 * KB; +constexpr size_t ALIGN_SIZE = 8; + +std::mutex MSPTITracker::mtx; + +inline uint8_t *align_buffer(uint8_t *buffer, size_t align) +{ + return reinterpret_cast( + (reinterpret_cast(buffer) + (align - 1)) & ~(align - 1)); +} + +MSPTITracker::MSPTITracker() +{ + std::cout << "Logging initialized from preloaded library." << std::endl; + hcclFileWriter = + std::make_unique("hccl_activity.json"); + msptiSubscribe(&subscriber, nullptr, nullptr); + msptiActivityRegisterCallbacks(UserBufferRequest, UserBufferComplete); + msptiActivityEnable(MSPTI_ACTIVITY_KIND_MARKER); +} + +MSPTITracker::~MSPTITracker() +{ + msptiActivityFlushAll(1); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_MARKER); + finish(); +} + +MSPTITracker &MSPTITracker::getInstance() +{ + static MSPTITracker instance; + return instance; +} + +void MSPTITracker::finish() +{ + std::cout << "Finishing MSPTI Tracker" << std::endl; + if (hcclFileWriter) + { + hcclFileWriter->stopWriter(); + } +} + +void MSPTITracker::readActivityMarker(msptiActivityMarker *activity) +{ + if (hcclFileWriter) + { + hcclFileWriter->bufferMarkerActivity(activity); + } +} + +void MSPTITracker::UserBufferRequest(uint8_t **buffer, size_t *size, + size_t *maxNumRecords) +{ + auto &instance = getInstance(); + std::lock_guard lock(mtx); + constexpr uint32_t SIZE = (uint32_t)MB * 1; + instance.requestedCount.fetch_add(1); + uint8_t *pBuffer = (uint8_t *)malloc(SIZE + ALIGN_SIZE); + *buffer = align_buffer(pBuffer, ALIGN_SIZE); + *size = MB * 1; + *maxNumRecords = 0; +} + +void MSPTITracker::UserBufferComplete(uint8_t *buffer, size_t size, + size_t validSize) +{ + auto &instance = getInstance(); + if (validSize > 0) + { + msptiActivity *pRecord = nullptr; + msptiResult status = MSPTI_SUCCESS; + do + { + std::lock_guard lock(mtx); + status = msptiActivityGetNextRecord(buffer, validSize, &pRecord); + if (status == MSPTI_SUCCESS && + pRecord->kind == MSPTI_ACTIVITY_KIND_MARKER) + { + instance.readActivityMarker( + reinterpret_cast(pRecord)); + } + else if (status == MSPTI_ERROR_MAX_LIMIT_REACHED) + { + break; + } + } while (status == MSPTI_SUCCESS); + } + free(buffer); +} \ No newline at end of file diff --git a/systrace/src/mspti/mspti_tracker.hpp b/systrace/src/mspti/mspti_tracker.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a1c75bcda7e0aead480b0d09e5b43cf42f012775 --- /dev/null +++ b/systrace/src/mspti/mspti_tracker.hpp @@ -0,0 +1,33 @@ +#include "json_file_writer.h" +#include "mspti.h" +#include +#include +#include + +class MSPTITracker +{ + private: + static std::mutex mtx; + + msptiSubscriberHandle subscriber; + std::unique_ptr hcclFileWriter; + std::atomic requestedCount{0}; + + MSPTITracker(); + ~MSPTITracker(); + + public: + MSPTITracker(const MSPTITracker &) = delete; + MSPTITracker &operator=(const MSPTITracker &) = delete; + + static MSPTITracker &getInstance(); + + msptiSubscriberHandle *getSubscriber() { return &subscriber; } + void finish(); + void readActivityMarker(msptiActivityMarker *activity); + + static void UserBufferRequest(uint8_t **buffer, size_t *size, + size_t *maxNumRecords); + static void UserBufferComplete(uint8_t *buffer, size_t size, + size_t validSize); +}; \ No newline at end of file diff --git a/systrace/src/trace/CMakeLists.txt b/systrace/src/trace/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8eff8f276da7450360da0a83006a0515a6ff81ea --- /dev/null +++ b/systrace/src/trace/CMakeLists.txt @@ -0,0 +1,13 @@ +add_definitions(-DSYSTRACE_EXPORTS -D_GLIBCXX_USE_CXX11_ABI=1) + +set(PYTHON_TRACING_SOURCES + python/pytorch_tracing_loader.cc + python/pytorch_tracing_manager.cc +) + +set_source_files_properties( + systrace_manager.cc + library_loader.cc + ${PYTHON_TRACING_SOURCES} + PROPERTIES COMPILE_FLAGS "-fPIC -Wall -Wextra" +) \ No newline at end of file diff --git a/systrace/src/trace/library_loader.cc b/systrace/src/trace/library_loader.cc new file mode 100644 index 0000000000000000000000000000000000000000..a74bb376f9feb7f4922564a3380acc9dfcbc2801 --- /dev/null +++ b/systrace/src/trace/library_loader.cc @@ -0,0 +1,46 @@ +#include "library_loader.h" +#include "../../include/common/logging.h" +#include + +namespace systrace +{ + +DynamicLibraryLoader::DynamicLibraryLoader(const std::string &library_path) + : library_handle_(nullptr), is_usable_(false), library_path_(library_path) +{ + LoadDynamicLibrary(); +} + +DynamicLibraryLoader::~DynamicLibraryLoader() +{ + if (library_handle_) + { + dlclose(library_handle_); + library_handle_ = nullptr; + } +} + +void DynamicLibraryLoader::LoadDynamicLibrary() +{ + if (library_handle_) + { + STLOG(WARNING) << "Library already loaded: " << library_path_; + return; + } + + dlerror(); + + library_handle_ = dlopen(library_path_.c_str(), RTLD_LAZY); + if (!library_handle_) + { + const char *error_message = dlerror(); + STLOG(WARNING) << "Failed to load library: " + << (error_message ? error_message : "Unknown error"); + is_usable_ = false; + return; + } + + is_usable_ = true; +} + +} // namespace systrace \ No newline at end of file diff --git a/systrace/src/trace/library_loader.h b/systrace/src/trace/library_loader.h new file mode 100644 index 0000000000000000000000000000000000000000..abae8d75fdac612ee417a8ea631d6cf59bb7da19 --- /dev/null +++ b/systrace/src/trace/library_loader.h @@ -0,0 +1,32 @@ +#pragma once + +#include "../../include/common/util.h" +#include +#include +#include +#include + +namespace systrace +{ + +class DynamicLibraryLoader +{ + protected: + void *library_handle_; + bool is_usable_; + const std::string library_path_; + + void LoadDynamicLibrary(); + + public: + explicit DynamicLibraryLoader(const std::string &library_path); + virtual ~DynamicLibraryLoader(); + + bool IsLibraryLoaded() const + { + return library_handle_ != nullptr && is_usable_; + } + void *GetLibraryHandle() const { return library_handle_; } +}; + +} // namespace systrace \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing.c b/systrace/src/trace/python/pytorch_tracing.c new file mode 100644 index 0000000000000000000000000000000000000000..ce3b651425dcdc9bf7155f6ee05d3fc977465d60 --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing.c @@ -0,0 +1,618 @@ +#include "pytorch_tracing.h" +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 11 +#include +#endif + +Stagetype determine_stage_type(const char *function_name) +{ + if (function_name == NULL) + { + return UNKNOWN; + } + + if (strcmp(function_name, "GC") == 0) + { + return GC; + } + if (strcmp(function_name, + "torch.utils.data.dataloader@_BaseDataLoaderIter@__next__") == 0) + { + return DATALOADER; + } + if (strcmp(function_name, "torch_npu@npu@synchronize") == 0 || + strcmp(function_name, "torch_npu.npu@Event@synchronize") == 0 || + strcmp(function_name, "torch_npu.npu@Event@wait") == 0 || + strcmp(function_name, "torch_npu.npu@Stream@synchronize") == 0 || + strcmp(function_name, "torch_npu.npu@Stream@wait_event") == 0 || + strcmp(function_name, "torch_npu.npu@Stream@wait_stream") == 0) + { + return SYNCHRONIZATION; + } + if (strcmp(function_name, "torch@autograd@backward") == 0 || + strcmp(function_name, "torch@autograd@grad") == 0) + { + return BACKWARD; + } + if (strcmp(function_name, + "megatron.core.pipeline_parallel@schedules@forward_step") == 0) + { + return FORWARD; + } + if (strcmp(function_name, + "megatron.core.pipeline_parallel@schedules@backward_step") == 0) + { + return BACKWARD; + } + return UNKNOWN; +} + +static int register_tracing_function(const char *name, int index, char **errors) +{ + int64_t code_address; + int is_native; + int ret = + GetFuncAddressByPython(name, errors + index, &code_address, &is_native); + + if (ret) + { + printf("register function `%s` error\n", name); + return ret; + } + + printf("register function `%s` at address %ld\n", name, code_address); + addTracingData(index, name); + + TracingFunction *traced_function = + (TracingFunction *)malloc(sizeof(TracingFunction)); + traced_function->tag_name = index; + traced_function->function_name = strdup(name); + traced_function->py_code_address = code_address; + traced_function->is_native = is_native; + + HASH_ADD(hh, pytorch_tracing_func_map, py_code_address, sizeof(int64_t), + traced_function); + + return 0; +} + +static void set_profiler_for_all_threads() +{ + PyEval_SetProfile(profiler, NULL); + + PyThreadState *tstate = PyThreadState_Get(); + PyThreadState *thread_array[PY_TRACING_MAX_THREADS]; + memset(thread_array, 0, sizeof(thread_array)); + + int thread_count = 0; + while (tstate != NULL && thread_count < PY_TRACING_MAX_THREADS) + { + thread_array[thread_count++] = tstate; + printf("Set profiler for thread %ld\n", tstate->thread_id); + tstate = PyThreadState_Next(tstate); + } + + for (int i = 0; i < thread_count; i++) + { + PyThreadState_Swap(thread_array[i]); + PyEval_SetProfile(profiler, NULL); + } + + PyThreadState_Swap(thread_array[0]); +} + +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 11 +static void capture_stack(PyFrameObject *frame, PyTorchTracingData *trace_entry) +{ + PyGILState_STATE gstate = PyGILState_Ensure(); + int depth = 0; + while (frame && depth < MAX_STACK_DEPTH) + { + PyCodeObject *code = PyFrame_GetCode(frame); + if (!code) + { + break; + } + + const char *name = PyUnicode_AsUTF8(code->co_name); + const char *file = PyUnicode_AsUTF8(code->co_filename); + int line = PyFrame_GetLineNumber(frame); + + snprintf(trace_entry->stack_info[depth], 256, "%s@%s:%d", + name ? name : "unknown", file ? file : "unknown", line); + + PyFrameObject *next_frame = PyFrame_GetBack(frame); + Py_DECREF(code); + frame = next_frame; + + depth++; + } + trace_entry->stack_depth = depth; + PyGILState_Release(gstate); +} + +uint64_t getCodeOfFrame(PyFrameObject *frame) +{ + return (int64_t)(uintptr_t)PyFrame_GetCode(frame); +} +#else +static void capture_stack(PyFrameObject *frame, PyTorchTracingData *trace_entry) +{ + PyGILState_STATE gstate = PyGILState_Ensure(); + int depth = 0; + while (frame && depth < MAX_STACK_DEPTH) + { + snprintf(trace_entry->stack_info[depth], 256, "%s@%s:%d", + PyUnicode_AsUTF8(frame->f_code->co_name), + PyUnicode_AsUTF8(frame->f_code->co_filename), + PyFrame_GetLineNumber(frame)); + frame = frame->f_back; + depth++; + } + trace_entry->stack_depth = depth; + PyGILState_Release(gstate); +} + +uint64_t getCodeOfFrame(PyFrameObject *frame) +{ + return (int64_t)(uintptr_t)(frame->f_code); +} + +#endif + +uint64_t getMsTime() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)tv.tv_sec * 1000000 + (uint64_t)tv.tv_usec; +} + +static void ensure_python_initialized() +{ + if (!Py_IsInitialized()) + { + Py_Initialize(); + } +} + +TracingFunction *isTracedPyTorchFunction(PyFrameObject *frame) +{ + uint64_t code_address = getCodeOfFrame(frame); + TracingFunction *traced_function = NULL; + HASH_FIND(hh, pytorch_tracing_func_map, &code_address, sizeof(int64_t), + traced_function); + return traced_function; +} + +static int profiler(PyObject *obj, PyFrameObject *frame, int what, + PyObject *arg) +{ + TracingFunction *func_data = isTracedPyTorchFunction(frame); + if (!func_data) + return 0; + int tag_name = func_data->tag_name; + int stage_type = determine_stage_type(func_data->function_name); + if ((what == PyTrace_CALL) && start_tracing) + { + pthread_mutex_lock(&mutex); + TracingData *tracing_data = receiveTracingData(tag_name); + PyTorchTracingDataArray *curr_data = tracing_data->curr_data; + if (curr_data->cur == PY_TRACING_BUFFER_SIZE) + { + systrace_return_pytorch_tracing_data_array( + curr_data, PY_TRACING_READY_POOL, tag_name); + tracing_data->curr_data = + systrace_get_empty_pytorch_tracing_data_array(tag_name); + curr_data = tracing_data->curr_data; + } + curr_data->data[curr_data->cur].start = getMsTime(); + if (stage_type == DATALOADER) + { + global_stage_id++; + } + curr_data->data[curr_data->cur].stage_id = global_stage_id; + curr_data->data[curr_data->cur].stage_type = stage_type; + global_stage_type = stage_type; + capture_stack(frame, &curr_data->data[curr_data->cur]); + + pthread_mutex_unlock(&mutex); + } + else if (what == PyTrace_RETURN) + { + pthread_mutex_lock(&mutex); + TracingData *tracing_data = receiveTracingData(tag_name); + if (start_tracing) + { + PyTorchTracingDataArray *curr_data = tracing_data->curr_data; + curr_data->data[curr_data->cur].count = tracing_data->count; + curr_data->data[curr_data->cur++].end = getMsTime(); + } + tracing_data->count++; + pthread_mutex_unlock(&mutex); + } + return 0; +} + +static int set_error_message(char **error_message, const char *format, ...) { + va_list args; + va_start(args, format); + int size = vsnprintf(NULL, 0, format, args) + 1; + va_end(args); + + *error_message = malloc(size); + if (!*error_message) return 0; + + va_start(args, format); + vsnprintf(*error_message, size, format, args); + va_end(args); + + return 1; +} + +static int parse_input_string(const char *code, char ***tokens, int *token_count) { + char *copy = strdup(code); + if (!copy) return 0; + + char *saveptr = NULL; + *token_count = 0; + *tokens = malloc(3 * sizeof(char*)); + if (!*tokens) { + free(copy); + return 0; + } + + for (char *token = strtok_r(copy, "@", &saveptr); + token && *token_count < 3; + token = strtok_r(NULL, "@", &saveptr)) { + (*tokens)[(*token_count)++] = strdup(token); + } + + free(copy); + return 1; +} + +static char* build_python_code(const char *code, char **tokens, int token_count) { + const char *template = + "try:\n" + " obj = None\n" + "%s\n" + " while hasattr(obj, '__wrapped__'):\n" + " obj = getattr(obj, '__wrapped__')\n" + " if hasattr(obj, '__code__'):\n" + " address = id(obj.__code__)\n" + " is_native = 0\n" + " else:\n" + " address = id(obj)\n" + " is_native = 1\n" + "except Exception as e:\n" + " raise\n"; + + char *import_part = NULL; + if (token_count == 3) { + asprintf(&import_part, + " from %s import %s as mm\n" + " obj = getattr(mm, '%s')", + tokens[0], tokens[1], tokens[2]); + } else if (token_count == 2) { + asprintf(&import_part, + " from %s import %s as obj", + tokens[0], tokens[1]); + } else { + asprintf(&import_part, + " obj = globals().get('%s')\n" + " if obj is None:\n" + " raise ValueError('Global object not found: %s')", + code, code); + } + + char *python_code = NULL; + asprintf(&python_code, template, import_part); + free(import_part); + + return python_code; +} + +static int execute_python_code(const char *python_code, int use_globals, + int64_t *address, int *is_native, char **error_message) { + PyObject *globals = use_globals ? PyEval_GetGlobals() : PyDict_New(); + PyObject *locals = PyDict_New(); + + if (!globals || !locals) { + if (!use_globals && globals) Py_DECREF(globals); + if (locals) Py_DECREF(locals); + return set_error_message(error_message, "Failed to create Python dictionaries"); + } + + PyObject *result = PyRun_String(python_code, Py_file_input, globals, locals); + if (!result) { + PyObject *ptype, *pvalue, *ptraceback; + PyErr_Fetch(&ptype, &pvalue, &ptraceback); + PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); + + if (pvalue) { + PyObject *py_str = PyObject_Str(pvalue); + if (py_str) { + const char *str_error = PyUnicode_AsUTF8(py_str); + set_error_message(error_message, "Python error: %s", str_error ? str_error : "Unknown error"); + Py_DECREF(py_str); + } + } + + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + PyErr_Clear(); + + if (!use_globals) Py_DECREF(globals); + Py_DECREF(locals); + return 1; + } + Py_DECREF(result); + + PyObject *py_address = PyDict_GetItemString(locals, "address"); + PyObject *py_is_native = PyDict_GetItemString(locals, "is_native"); + + if (!py_address || !py_is_native) { + if (!use_globals) Py_DECREF(globals); + Py_DECREF(locals); + return set_error_message(error_message, "Failed to get address or is_native from execution"); + } + + *address = PyLong_AsLongLong(py_address); + *is_native = PyLong_AsLongLong(py_is_native); + + if (!use_globals) Py_DECREF(globals); + Py_DECREF(locals); + return 0; +} + +static int GetFuncAddressByPython(const char *code, char **error_message, + int64_t *address, int *is_native) { + *error_message = NULL; + *address = 0; + *is_native = 0; + + if (!code || !*code) { + return set_error_message(error_message, "Empty or NULL code parameter"); + } + + char **tokens = NULL; + int token_count = 0; + if (!parse_input_string(code, &tokens, &token_count)) { + return set_error_message(error_message, "Failed to parse input string"); + } + + char *python_code = build_python_code(code, tokens, token_count); + if (!python_code) { + for (int i = 0; i < token_count; i++) free(tokens[i]); + free(tokens); + return set_error_message(error_message, "Failed to build Python code"); + } + + int use_globals = (token_count == 0); + int result = execute_python_code(python_code, use_globals, address, is_native, error_message); + + free(python_code); + for (int i = 0; i < token_count; i++) free(tokens[i]); + free(tokens); + + if (result == 0) { + set_error_message(error_message, "Get __code__ attribute for '%s' OK", code); + } + + return result; +} +static TracingData *receiveTracingData(int name) +{ + return pytorch_tracing_data_array + name; +} + +static void addTracingData(int name, const char *func_name) +{ + TracingData *v = receiveTracingData(name); + v->tag_name = name; + v->curr_data = systrace_get_empty_pytorch_tracing_data_array(name); + v->function_name = strdup(func_name); +} + +static void getGcInfo(PyTorchTracingData *data, PyObject *info) +{ + if (!PyDict_Check(info)) + return; + PyObject *collected = PyDict_GetItemString(info, "collected"); + PyObject *uncollectable = PyDict_GetItemString(info, "uncollectable"); + + if (collected && PyLong_Check(collected)) + { + data->payload.gc_debug[0] = PyLong_AsLong(collected); + } + else + { + data->payload.gc_debug[0] = -1; + } + + if (uncollectable && PyLong_Check(uncollectable)) + { + data->payload.gc_debug[1] = PyLong_AsLong(uncollectable); + } + else + { + data->payload.gc_debug[1] = -1; + } +} + +static void gcCallback(PyObject *phase, PyObject *info) +{ + pthread_mutex_lock(&mutex); + if (PyUnicode_CompareWithASCIIString(phase, "start") == 0 && start_tracing) + { + TracingData *tracing_data = receiveTracingData(PY_TRACING_GC); + PyTorchTracingDataArray *curr_data = tracing_data->curr_data; + if (curr_data->cur == PY_TRACING_BUFFER_SIZE) + { + systrace_return_pytorch_tracing_data_array( + curr_data, PY_TRACING_READY_POOL, PY_TRACING_GC); + tracing_data->curr_data = + systrace_get_empty_pytorch_tracing_data_array(PY_TRACING_GC); + curr_data = tracing_data->curr_data; + } + curr_data->data[curr_data->cur].start = getMsTime(); + pthread_mutex_unlock(&mutex); + } + else if (PyUnicode_CompareWithASCIIString(phase, "stop") == 0) + { + TracingData *tracing_data = receiveTracingData(PY_TRACING_GC); + if (start_tracing) + { + PyTorchTracingDataArray *curr_data = tracing_data->curr_data; + if (start_tracing) + { + curr_data->data[curr_data->cur].count = tracing_data->count; + curr_data->data[curr_data->cur].type = PAYLOAD_GC; + getGcInfo(curr_data->data + curr_data->cur, info); + curr_data->data[curr_data->cur++].end = getMsTime(); + } + curr_data->data[curr_data->cur].count = tracing_data->count; + curr_data->data[curr_data->cur].stage_id = global_stage_id; + curr_data->data[curr_data->cur++].end = getMsTime(); + } + tracing_data->count++; + } + pthread_mutex_unlock(&mutex); +} + +static PyObject *gcCallbackWrapper(PyObject *self, PyObject *args, + PyObject *kwargs) +{ + PyObject *phase, *info; + if (!PyArg_ParseTuple(args, "OO", &phase, &info)) + { + return NULL; + } + gcCallback(phase, info); + Py_RETURN_NONE; +} + +static PyTypeObject GcCallbackType = { + PyVarObject_HEAD_INIT(NULL, 0) "gc_callback", /* tp_name */ + sizeof(PyObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + gcCallbackWrapper, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + +PyTorchTracingDataArray * +systrace_get_partial_pytorch_tracing_data_array(int name) +{ + pthread_mutex_lock(&mutex); + TracingData *tracing_data = receiveTracingData(name); + if ((!tracing_data || !tracing_data->curr_data) || + (tracing_data->curr_data->cur == 0)) + { + pthread_mutex_unlock(&mutex); + return NULL; + } + PyTorchTracingDataArray *result = tracing_data->curr_data; + tracing_data->curr_data = + systrace_get_empty_pytorch_tracing_data_array(name); + pthread_mutex_unlock(&mutex); + return result; +} + +void systrace_register_gc(char **error_message) +{ + addTracingData(PY_TRACING_GC, "GC"); + PyObject *gc_module = PyImport_ImportModule("gc"); + if (!gc_module) + { + return; + } + + PyObject *callbacks_list = PyObject_GetAttrString(gc_module, "callbacks"); + if (!callbacks_list || !PyList_Check(callbacks_list)) + { + Py_XDECREF(callbacks_list); + Py_DECREF(gc_module); + return; + } + + PyObject *py_callback = PyObject_New(PyObject, &GcCallbackType); + + if (!py_callback) + { + Py_DECREF(callbacks_list); + Py_DECREF(gc_module); + return; + } + + if (PyList_Append(callbacks_list, py_callback) != 0) + { + Py_DECREF(py_callback); + Py_DECREF(callbacks_list); + Py_DECREF(gc_module); + return; + } + + Py_DECREF(callbacks_list); + Py_DECREF(gc_module); + *error_message = strdup("Import gc Ok"); +} + +static void init_tracing_data_array(int count) +{ + tracing_data_count = count; + pytorch_tracing_data_array = + (TracingData *)malloc(sizeof(TracingData) * tracing_data_count); + memset(pytorch_tracing_data_array, 0, + sizeof(TracingData) * tracing_data_count); +} + +void systrace_register_tracing(const char **names, int count, char **errors) +{ + ensure_python_initialized(); + + PyGILState_STATE gstate = PyGILState_Ensure(); + + init_tracing_data_array(count); + systrace_register_gc(errors); + + for (int i = 1; i < count; i++) + { + register_tracing_function(names[i], i, errors); + } + + set_profiler_for_all_threads(); + + PyGILState_Release(gstate); +} \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing.h b/systrace/src/trace/python/pytorch_tracing.h new file mode 100644 index 0000000000000000000000000000000000000000..5209b2886cfb5d4c247c4b3becf01d076044e4a6 --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing.h @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include +#include + +#include "../../../include/common/shared_constants.h" +#include "../../../thirdparty/uthash.h" +#include "pytorch_tracing_data.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + __attribute__((visibility("default"))) PyTorchTracingDataArray * + systrace_get_empty_pytorch_tracing_data_array(int); + __attribute__((visibility("default"))) PyTorchTracingDataArray * + systrace_get_full_pytorch_tracing_data_array(int); + + __attribute__((visibility("default"))) PyTorchTracingDataArray * + systrace_get_partial_pytorch_tracing_data_array(int); + + __attribute__((visibility("default"))) void + systrace_return_pytorch_tracing_data_array(PyTorchTracingDataArray *, + int type, int name); + __attribute__((visibility("default"))) void + systrace_register_tracing(const char **, int, char **); +#ifdef __cplusplus +} +#endif +typedef struct +{ + int64_t py_code_address; + const char *function_name; + int tag_name; + int is_native; + UT_hash_handle hh; +} TracingFunction; + +typedef struct +{ + int tag_name; + PyTorchTracingDataArray *curr_data; + int64_t count; + const char *function_name; +} TracingData; + +typedef struct _frame PyFrameObject; +uint64_t getCodeOfFrame(PyFrameObject *frame); +static void capture_stack(PyFrameObject *frame, + PyTorchTracingData *trace_entry); + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +static TracingData *pytorch_tracing_data_array = NULL; + +static TracingFunction *pytorch_tracing_func_map = NULL; +static int start_tracing = 1; +static int tracing_data_count = 0; + +static int GetFuncAddressByPython(const char *input, char **error_message, + int64_t *code_address, int *is_native); +static uint64_t getMsTime(); +static TracingFunction *isTracedPyTorchFunction(PyFrameObject *frame); +static TracingData *receiveTracingData(int name); +static void addTracingData(int name, const char *func_name); +static int profiler(PyObject *obj, PyFrameObject *frame, int what, + PyObject *arg); \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing_data.h b/systrace/src/trace/python/pytorch_tracing_data.h new file mode 100644 index 0000000000000000000000000000000000000000..f8a601df9eaae87e866ba9fe32bfa140c7ab8ddb --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing_data.h @@ -0,0 +1,54 @@ +#pragma once +#include + +#ifndef PY_TRACING_BUFFER_SIZE +#define PY_TRACING_BUFFER_SIZE 512 +#define PY_TRACING_MAX_THREADS 256 +#endif +#define PY_TRACING_READY_POOL 0 +#define PY_TRACING_EMPTY_POOL 1 +#define PY_TRACING_GC 0 +#define PY_DATALOADER 1 + +#define MAX_STACK_DEPTH 32 +#define MAX_STACK_FRAME_LENGTH 256 + +typedef enum +{ + PAYLOAD_UNINITIALIZED = 0, + PAYLOAD_GC = 1, +} PayloadType; + +typedef enum +{ + UNKNOWN = 0, + DATALOADER, + FORWARD, + BACKWARD, + SYNCHRONIZATION, + GC, +} Stagetype; + +typedef union +{ + int gc_debug[2]; +} Payload; + +typedef struct +{ + uint64_t start; + uint64_t end; + uint32_t count; + uint32_t stage_id; + Stagetype stage_type; + Payload payload; + PayloadType type; + char stack_info[MAX_STACK_DEPTH][256]; + int stack_depth; +} PyTorchTracingData; + +typedef struct +{ + PyTorchTracingData data[PY_TRACING_BUFFER_SIZE]; + uint64_t cur; +} PyTorchTracingDataArray; \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing_loader.cc b/systrace/src/trace/python/pytorch_tracing_loader.cc new file mode 100644 index 0000000000000000000000000000000000000000..fab92ffbc4068dcf0446108f8e5d379faa8c1944 --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing_loader.cc @@ -0,0 +1,121 @@ +#include "pytorch_tracing_loader.h" +#include "../../../include/common/logging.h" +#include +#include + +namespace systrace +{ +namespace pytorch_tracing +{ + +PyTorchTracingLibrary::PyTorchTracingLibrary(const std::string &library_path) + : DynamicLibraryLoader(library_path), register_tracing_(nullptr), + get_tracing_data_(nullptr), get_partial_tracing_data_(nullptr), + return_tracing_data_(nullptr) +{ + if (library_handle_) + { + InitializeSymbols(); + } +} + +void PyTorchTracingLibrary::InitializeSymbols() +{ + std::vector configs = { + {"systrace_register_tracing", + [this]() { return reinterpret_cast(®ister_tracing_); }, + "TracingRegistrationFunc"}, + + {"systrace_get_full_pytorch_tracing_data_array", + [this]() { return reinterpret_cast(&get_tracing_data_); }, + "DataArrayRetrievalAllFunc"}, + + {"systrace_return_pytorch_tracing_data_array", + [this]() { return reinterpret_cast(&return_tracing_data_); }, + "DataArrayReleaseFunc"}, + + {"systrace_get_partial_pytorch_tracing_data_array", [this]() + { return reinterpret_cast(&get_partial_tracing_data_); }, + "GetPartialTracingDataArrayPartFunc"}}; + + is_usable_ = std::all_of(configs.begin(), configs.end(), + [this](const SymbolConfig &config) + { return LoadSymbol(config); }); +} + +bool PyTorchTracingLibrary::LoadSymbol(const SymbolConfig &config) +{ + void *symbol = dlsym(library_handle_, config.name); + if (!symbol) + { + STLOG(WARNING) << "Failed to load symbol: " << config.name + << " (type: " << config.type_name + << "), error: " << dlerror(); + return false; + } + + *reinterpret_cast(config.loader()) = symbol; + return true; +} + +std::vector +PyTorchTracingLibrary::Register(const std::vector &names) +{ + if (!is_usable_) + { + return {}; + } + + auto error_holder = std::unique_ptr>( + new char *[names.size()], + [size = names.size()](char **ptr) + { + for (size_t i = 0; i < size; ++i) + { + free(ptr[i]); + } + delete[] ptr; + }); + std::memset(error_holder.get(), 0, names.size() * sizeof(char *)); + + std::vector c_str_array; + c_str_array.reserve(names.size()); + std::transform(names.begin(), names.end(), std::back_inserter(c_str_array), + [](const std::string &str) { return str.c_str(); }); + + register_tracing_(c_str_array.data(), c_str_array.size(), + error_holder.get()); + + std::vector result; + for (size_t i = 0; i < names.size(); ++i) + { + if (error_holder[i]) + { + result.emplace_back(error_holder[i]); + } + } + return result; +} + +PyTorchTracingDataArray *PyTorchTracingLibrary::RetrieveAllTracingData(int name) +{ + return is_usable_ ? get_tracing_data_(name) : nullptr; +} + +PyTorchTracingDataArray * +PyTorchTracingLibrary::RetrievePartialTracingData(int name) +{ + return is_usable_ ? get_partial_tracing_data_(name) : nullptr; +} + +void PyTorchTracingLibrary::ReleaseTracingData(PyTorchTracingDataArray *data, + int type, int name) +{ + if (is_usable_ && data) + { + return_tracing_data_(data, type, name); + } +} + +} // namespace pytorch_tracing +} // namespace systrace \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing_loader.h b/systrace/src/trace/python/pytorch_tracing_loader.h new file mode 100644 index 0000000000000000000000000000000000000000..9d5aaa49855cd9314f92bd69c622b8d5ed88f5d7 --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing_loader.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include + +#include "../../../include/common/macro.h" +#include "../library_loader.h" +#include "pytorch_tracing_data.h" + +namespace systrace +{ +namespace pytorch_tracing +{ + +class PyTorchTracingLibrary : public DynamicLibraryLoader +{ + public: + explicit PyTorchTracingLibrary(const std::string &); + using TracingRegistrationFunc = void (*)(const char **, int, char **); + using DataArrayRetrievalAllFunc = PyTorchTracingDataArray *(*)(int); + using GetPartialTracingDataArrayPartFunc = + PyTorchTracingDataArray *(*)(int); + using DataArrayReleaseFunc = void (*)(PyTorchTracingDataArray *, int, int); + PyTorchTracingDataArray *RetrieveAllTracingData(int); + PyTorchTracingDataArray *RetrievePartialTracingData(int); + std::vector Register(const std::vector &names); + void ReleaseTracingData(PyTorchTracingDataArray *data, int type, int name); + + private: + TracingRegistrationFunc register_tracing_; + DataArrayRetrievalAllFunc get_tracing_data_; + GetPartialTracingDataArrayPartFunc get_partial_tracing_data_; + DataArrayReleaseFunc return_tracing_data_; + void InitializeSymbols(); + struct SymbolConfig + { + const char *name; + std::function loader; + const char *type_name; + }; + bool LoadSymbol(const SymbolConfig &config); +}; + +} // namespace pytorch_tracing +} // namespace systrace \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing_manager.cc b/systrace/src/trace/python/pytorch_tracing_manager.cc new file mode 100644 index 0000000000000000000000000000000000000000..3be99ca325ce956898381905de654cecf18ae193 --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing_manager.cc @@ -0,0 +1,58 @@ +#include "pytorch_tracing_manager.h" +#include "pytorch_tracing_data.h" +#include +#include + +namespace systrace +{ +namespace pytorch_tracing_manager +{ + +PyTorchTracingManager &PyTorchTracingManager::getInstance() +{ + std::call_once(init_flag_, &PyTorchTracingManager::initSingleton); + return *instance_; +} + +void PyTorchTracingManager::initSingleton() +{ + instance_ = new PyTorchTracingManager(); +} + +PyTorchTracingDataArray * +PyTorchTracingManager::getEmptyPyTorchTracingDataArray(int name) +{ + auto &pool_item = pool_[name]; + auto *data = pool_item.empty_pool.getObject(); + std::memset(data, 0, sizeof(PyTorchTracingDataArray)); + return data; +} + +void PyTorchTracingManager::returnPyTorchTracingDataArray( + PyTorchTracingDataArray *array, int type, int name) +{ + + if (!array) + return; + + auto &pool_item = pool_[name]; + int pool_queue_size = 0; + + switch (type) + { + case PY_TRACING_READY_POOL: + pool_item.ready_pool.returnObject(array, &pool_queue_size); + break; + case PY_TRACING_EMPTY_POOL: + pool_item.empty_pool.returnObject(array, &pool_queue_size); + break; + } +} + +PyTorchTracingDataArray * +PyTorchTracingManager::getPyTorchTracingDataArray(int name) +{ + return pool_[name].ready_pool.getObject(); +} +} // namespace pytorch_tracing_manager +} // namespace systrace \ No newline at end of file diff --git a/systrace/src/trace/python/pytorch_tracing_manager.h b/systrace/src/trace/python/pytorch_tracing_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..ead4e5a72690b17ee7a66ad0c93c464afcc90a72 --- /dev/null +++ b/systrace/src/trace/python/pytorch_tracing_manager.h @@ -0,0 +1,73 @@ +#pragma once +#include +#include +#include + +#include "../../../include/common/util.h" +#include "pytorch_tracing.h" +#include "pytorch_tracing_data.h" + +namespace systrace +{ +namespace pytorch_tracing_manager +{ + +class PyTorchTracingManager +{ + public: + PyTorchTracingManager(const PyTorchTracingManager &) = delete; + PyTorchTracingManager &operator=(const PyTorchTracingManager &) = delete; + static void initSingleton(); + static PyTorchTracingManager &getInstance(); + + PyTorchTracingDataArray *getEmptyPyTorchTracingDataArray(int name); + void returnPyTorchTracingDataArray(PyTorchTracingDataArray *, int, + int name); + PyTorchTracingDataArray *getPyTorchTracingDataArray(int name); + PyTorchTracingDataArray *getCurPyTorchTracingDataArray(int name); + + private: + PyTorchTracingManager() = default; + inline static PyTorchTracingManager *instance_ = nullptr; + inline static std::once_flag init_flag_; + struct Pool + { + util::resource::TimerPool empty_pool; + util::resource::TimerPool ready_pool; + }; + std::unordered_map pool_; +}; +} // namespace pytorch_tracing_manager +} // namespace systrace + +#ifdef __cplusplus +extern "C" +{ +#endif + PyTorchTracingDataArray * + systrace_get_empty_pytorch_tracing_data_array(int name) + { + return systrace::pytorch_tracing_manager::PyTorchTracingManager:: + getInstance() + .getEmptyPyTorchTracingDataArray(name); + } + + PyTorchTracingDataArray * + systrace_get_full_pytorch_tracing_data_array(int name) + { + return systrace::pytorch_tracing_manager::PyTorchTracingManager:: + getInstance() + .getPyTorchTracingDataArray(name); + } + + void + systrace_return_pytorch_tracing_data_array(PyTorchTracingDataArray *array, + int type, int name) + { + systrace::pytorch_tracing_manager::PyTorchTracingManager::getInstance() + .returnPyTorchTracingDataArray(array, type, name); + } + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/systrace/src/trace/systrace_manager.cc b/systrace/src/trace/systrace_manager.cc new file mode 100644 index 0000000000000000000000000000000000000000..eef5606365b7e98465a732612ed575a82b767897 --- /dev/null +++ b/systrace/src/trace/systrace_manager.cc @@ -0,0 +1,243 @@ +#include +#include +#include +#include + +#include "../../include/common/constant.h" +#include "../../include/common/shared_constants.h" +#include "systrace_manager.h" + +int global_stage_id = 0; +int global_stage_type = 0; +namespace systrace +{ + +namespace +{ +constexpr uint64_t TRACE_INTERVAL = 100; +constexpr std::chrono::milliseconds POLL_INTERVAL(10); +} // namespace + +PyTorchTrace &PyTorchTrace::getInstance() +{ + std::call_once(init_flag_, + []() + { + instance_ = new PyTorchTrace(); + instance_->initialize(); + }); + return *instance_; +} + +void PyTorchTrace::initialize() +{ + pytorch_trace_.set_rank(config::GlobalConfig::Instance().rank); + STLOG(INFO) << "[PyTorchTrace] Rank set to: " + << config::GlobalConfig::Instance().rank; + + pytorch_tracing_library_ = + new pytorch_tracing::PyTorchTracingLibrary("libsysTrace.so"); + STLOG(INFO) << "[PyTorchTrace] Tracing library loaded"; + + registerTracingFunctions(); +} + +void PyTorchTrace::registerTracingFunctions() +{ + pytorch_tracing_functions_ = { + "GC", + "torch.utils.data.dataloader@_BaseDataLoaderIter@__next__", + "torch_npu@npu@synchronize", + "torch_npu.npu@Event@synchronize", + "torch_npu.npu@Event@wait", + "torch_npu.npu@Stream@synchronize", + "torch_npu.npu@Stream@wait_event", + "torch_npu.npu@Stream@wait_stream", + "torch@autograd@backward", + "torch@autograd@grad", + "megatron.core.pipeline_parallel@schedules@forward_step", + "megatron.core.pipeline_parallel@schedules@backward_step"}; + + auto errors = + pytorch_tracing_library_->Register(pytorch_tracing_functions_); + for (size_t i = 0; i < pytorch_tracing_functions_.size(); ++i) + { + STLOG(INFO) << "Registered function: " << pytorch_tracing_functions_[i] + << ", status: " << errors[i]; + } +} + +bool PyTorchTrace::triggerTrace() { return has_trigger_trace_.exchange(true); } + +void PyTorchTrace::dumpPyTorchTracing() +{ + const std::string &dump_path = + std::string(constant::TorchTraceConstant::DEFAULT_TRACE_DUMP_PATH); + + if (util::fs_utils::CreateDirectoryIfNotExists(dump_path)) + { + STLOG(ERROR) << "[PyTorchTrace] Failed to create dump directory"; + return; + } + + std::lock_guard lock(trace_mutex_); + + pytorch_trace_.set_rank(config::GlobalConfig::Instance().local_rank); + pytorch_trace_.set_comm(config::GlobalConfig::Instance().job_name); + + for (size_t i = 0; i < pytorch_tracing_functions_.size(); ++i) + { + processFunctionTracingData(i); + } + + writeTraceToFile(); +} + +void PyTorchTrace::processFunctionTracingData(size_t function_index) +{ + std::vector data_holders; + + if (auto data = pytorch_tracing_library_->RetrievePartialTracingData( + function_index)) + { + data_holders.push_back(data); + } + + while (auto data = + pytorch_tracing_library_->RetrieveAllTracingData(function_index)) + { + data_holders.push_back(data); + } + + for (auto data : data_holders) + { + for (uint32_t i = 0; i < data->cur; ++i) + { + if (data->data[i].start == 0) + continue; + + auto trace = pytorch_trace_.add_pytorch_stages(); + trace->set_start_us(data->data[i].start); + trace->set_end_us(data->data[i].end); + trace->set_stage_id(data->data[i].count); + trace->set_stage_type(pytorch_tracing_functions_[function_index]); + + if (data->data[i].stack_depth > 0) + { + trace->mutable_stack_frames()->Reserve( + data->data[i].stack_depth); + for (int j = 0; j < data->data[i].stack_depth; ++j) + { + if (data->data[i].stack_info[j][0] != '\0') + { + trace->add_stack_frames(data->data[i].stack_info[j]); + } + } + } + + if (data->data[i].type == PAYLOAD_GC) + { + auto gc_debug = trace->mutable_gc_debug(); + gc_debug->set_collected(data->data[i].payload.gc_debug[0]); + gc_debug->set_uncollectable(data->data[i].payload.gc_debug[1]); + } + } + } + + for (auto data : data_holders) + { + pytorch_tracing_library_->ReleaseTracingData( + data, PY_TRACING_EMPTY_POOL, function_index); + } +} + +void PyTorchTrace::writeTraceToFile() +{ + const std::string &dump_path = + std::string(constant::TorchTraceConstant::DEFAULT_TRACE_DUMP_PATH); + std::string file_path = + dump_path + "/" + + util::fs_utils::GenerateClusterUniqueFilename(".timeline"); + + std::ofstream file(file_path, std::ios::binary | std::ios::out); + if (!file) + { + STLOG(ERROR) << "[PyTorchTrace] Failed to open file: " << file_path; + return; + } + + std::string binary_data; + if (!pytorch_trace_.SerializeToString(&binary_data)) + { + STLOG(ERROR) << "[PyTorchTrace] Failed to serialize trace data"; + return; + } + + file << binary_data; +} + +SysTrace &SysTrace::getInstance() +{ + std::call_once(init_flag_, + []() + { + instance_ = new SysTrace(); + instance_->initializeSystem(); + }); + return *instance_; +} + +SysTrace::~SysTrace() { stopEventPoller(); } + +void SysTrace::initializeSystem() +{ + if (!config::GlobalConfig::Instance().enable) + return; + + systrace::util::InitializeSystemUtilities(); + MSPTITracker::getInstance(); + PyTorchTrace::getInstance(); + + startEventPoller(); +} + +void SysTrace::startEventPoller() +{ +#ifdef _GNU_SOURCE + should_run_ = true; + event_poller_ = std::thread(&SysTrace::eventPollerMain, this); + pthread_setname_np(event_poller_.native_handle(), "systrace_poller"); +#endif + STLOG(INFO) << "[SysTrace] Event poller started"; +} + +void SysTrace::stopEventPoller() +{ + should_run_ = false; + if (event_poller_.joinable()) + { + event_poller_.join(); + } +} + +void SysTrace::eventPollerMain() +{ + while (should_run_) + { + if (loop_count_++ % TRACE_INTERVAL == 0) + { + if (PyTorchTrace::getInstance().triggerTrace()) + { + PyTorchTrace::getInstance().dumpPyTorchTracing(); + } + } + std::this_thread::sleep_for(POLL_INTERVAL); + } + + if (PyTorchTrace::getInstance().triggerTrace()) + { + PyTorchTrace::getInstance().dumpPyTorchTracing(); + } +} + +} // namespace systrace \ No newline at end of file diff --git a/systrace/src/trace/systrace_manager.h b/systrace/src/trace/systrace_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..c043aba0b1985269e8eebc963d77e98b0403f06c --- /dev/null +++ b/systrace/src/trace/systrace_manager.h @@ -0,0 +1,76 @@ +#pragma once +#include +#include +#include +#include +#include + +#include "../../include/common/logging.h" +#include "../../include/common/util.h" +#include "../../protos/systrace.pb.h" +#include "../mspti/mspti_tracker.hpp" +#include "library_loader.h" +#include "python/pytorch_tracing_loader.h" + +namespace systrace +{ +using namespace util; + +class PyTorchTrace +{ + public: + static PyTorchTrace &getInstance(); + + void dumpPyTorchTracing(); + void dumpPyTorchTracing(bool incremental, bool async); + bool triggerTrace(); + + PyTorchTrace(const PyTorchTrace &) = delete; + PyTorchTrace &operator=(const PyTorchTrace &) = delete; + + private: + PyTorchTrace() = default; + ~PyTorchTrace() = default; + + void initialize(); + void registerTracingFunctions(); + void processFunctionTracingData(size_t function_index); + void writeTraceToFile(); + + inline static PyTorchTrace *instance_ = nullptr; + inline static std::once_flag init_flag_; + + Pytorch pytorch_trace_; + std::atomic has_trigger_trace_{false}; + std::mutex trace_mutex_; + + std::vector pytorch_tracing_functions_; + pytorch_tracing::PyTorchTracingLibrary *pytorch_tracing_library_; +}; + +class SysTrace +{ + public: + static SysTrace &getInstance(); + + SysTrace(const SysTrace &) = delete; + SysTrace &operator=(const SysTrace &) = delete; + + private: + SysTrace() = default; + ~SysTrace(); + + void initializeSystem(); + void startEventPoller(); + void stopEventPoller(); + void eventPollerMain(); + + inline static SysTrace *instance_ = nullptr; + inline static std::once_flag init_flag_; + + std::atomic should_run_{true}; + std::atomic loop_count_{0}; + std::thread event_poller_; +}; + +} // namespace systrace \ No newline at end of file diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind-aarch64.h b/systrace/thirdparty/aarch64/libunwind/libunwind-aarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..f794600637a80430269e472d312a04559dc34e1a --- /dev/null +++ b/systrace/thirdparty/aarch64/libunwind/libunwind-aarch64.h @@ -0,0 +1,291 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2001-2004 Hewlett-Packard Co + Contributed by David Mosberger-Tang + Copyright (C) 2013 Linaro Limited + Copyright 2022 Blackberry Limited + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef LIBUNWIND_H +#define LIBUNWIND_H + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" +{ +#endif + +#include +#include +#include +#include +#include + +#ifndef UNW_EMPTY_STRUCT +#define UNW_EMPTY_STRUCT uint8_t unused; +#endif + +#define UNW_TARGET aarch64 +#define UNW_TARGET_AARCH64 1 + +#define _U_TDEP_QP_TRUE 0 /* see libunwind-dynamic.h */ + + /* This needs to be big enough to accommodate "struct cursor", while + leaving some slack for future expansion. Changing this value will + require recompiling all users of this library. Stack allocation is + relatively cheap and unwind-state copying is relatively rare, so we + want to err on making it rather too big than too small. + + Calculation is regs used (64 + 34) * 2 + 40 (bytes of rest of + cursor) + padding + */ + +#define UNW_TDEP_CURSOR_LEN 250 + + typedef uint64_t unw_word_t; + typedef int64_t unw_sword_t; + + typedef long double unw_tdep_fpreg_t; + +#define UNW_WORD_MAX UINT64_MAX + + typedef struct + { + /* no aarch64-specific auxiliary proc-info */ + UNW_EMPTY_STRUCT + } unw_tdep_proc_info_t; + + typedef enum + { + /* 64-bit general registers. */ + UNW_AARCH64_X0, + UNW_AARCH64_X1, + UNW_AARCH64_X2, + UNW_AARCH64_X3, + UNW_AARCH64_X4, + UNW_AARCH64_X5, + UNW_AARCH64_X6, + UNW_AARCH64_X7, + UNW_AARCH64_X8, + + /* Temporary registers. */ + UNW_AARCH64_X9, + UNW_AARCH64_X10, + UNW_AARCH64_X11, + UNW_AARCH64_X12, + UNW_AARCH64_X13, + UNW_AARCH64_X14, + UNW_AARCH64_X15, + + /* Intra-procedure-call temporary registers. */ + UNW_AARCH64_X16, + UNW_AARCH64_X17, + + /* Callee-saved registers. */ + UNW_AARCH64_X18, + UNW_AARCH64_X19, + UNW_AARCH64_X20, + UNW_AARCH64_X21, + UNW_AARCH64_X22, + UNW_AARCH64_X23, + UNW_AARCH64_X24, + UNW_AARCH64_X25, + UNW_AARCH64_X26, + UNW_AARCH64_X27, + UNW_AARCH64_X28, + + /* 64-bit frame pointer. */ + UNW_AARCH64_X29, + + /* 64-bit link register. */ + UNW_AARCH64_X30, + + /* 64-bit stack pointer. */ + UNW_AARCH64_SP = 31, + UNW_AARCH64_PC, + UNW_AARCH64_PSTATE, + + /* Pseudo-register */ + UNW_AARCH64_RA_SIGN_STATE = 34, + + /* SVE Vector Granule pseudo register */ + UNW_AARCH64_VG = 46, + + /* 128-bit FP/Advanced SIMD registers. */ + UNW_AARCH64_V0 = 64, + UNW_AARCH64_V1, + UNW_AARCH64_V2, + UNW_AARCH64_V3, + UNW_AARCH64_V4, + UNW_AARCH64_V5, + UNW_AARCH64_V6, + UNW_AARCH64_V7, + UNW_AARCH64_V8, + UNW_AARCH64_V9, + UNW_AARCH64_V10, + UNW_AARCH64_V11, + UNW_AARCH64_V12, + UNW_AARCH64_V13, + UNW_AARCH64_V14, + UNW_AARCH64_V15, + UNW_AARCH64_V16, + UNW_AARCH64_V17, + UNW_AARCH64_V18, + UNW_AARCH64_V19, + UNW_AARCH64_V20, + UNW_AARCH64_V21, + UNW_AARCH64_V22, + UNW_AARCH64_V23, + UNW_AARCH64_V24, + UNW_AARCH64_V25, + UNW_AARCH64_V26, + UNW_AARCH64_V27, + UNW_AARCH64_V28, + UNW_AARCH64_V29, + UNW_AARCH64_V30, + UNW_AARCH64_V31, + + UNW_AARCH64_FPSR, + UNW_AARCH64_FPCR, + + /* For AArch64, the CFA is the value of SP (x31) at the call site of the + previous frame. */ + UNW_AARCH64_CFA = UNW_AARCH64_SP, + + UNW_TDEP_LAST_REG = UNW_AARCH64_FPCR, + + UNW_TDEP_IP = UNW_AARCH64_X30, + UNW_TDEP_SP = UNW_AARCH64_SP, + UNW_TDEP_EH = UNW_AARCH64_X0 + + } aarch64_regnum_t; + +/* Use R0 through R3 to pass exception handling information. */ +#define UNW_TDEP_NUM_EH_REGS 4 + + typedef struct unw_tdep_save_loc + { + /* Additional target-dependent info on a save location. */ + UNW_EMPTY_STRUCT + } unw_tdep_save_loc_t; + +#ifdef __linux__ + /* On AArch64, we can directly use ucontext_t as the unwind context, + * however, the __reserved struct is quite large: tune it down to only + * the necessary used fields. */ + + struct unw_sigcontext + { + uint64_t fault_address; + uint64_t regs[31]; + uint64_t sp; + uint64_t pc; + uint64_t pstate; + alignas(16) uint8_t __reserved[(66 * 8)]; + }; + + typedef struct + { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; +#ifndef __ANDROID__ + sigset_t uc_sigmask; +#else + union + { + sigset_t uc_sigmask; + sigset64_t uc_sigmask64; + }; + char __padding[128 - sizeof(sigset_t)]; +#endif + struct unw_sigcontext uc_mcontext; + } unw_tdep_context_t; + + typedef struct + { + uint32_t _ctx_magic; + uint32_t _ctx_size; + uint32_t fpsr; + uint32_t fpcr; + uint64_t vregs[64]; + } unw_fpsimd_context_t; +#else +/* On AArch64, we can directly use ucontext_t as the unwind context. */ +typedef ucontext_t unw_tdep_context_t; +#endif + +#include "libunwind-common.h" +#include "libunwind-dynamic.h" + +#if defined(__FreeBSD__) +#define UNW_BASE \ + register uint64_t unw_base __asm__("x0") = \ + (uint64_t)unw_ctx->uc_mcontext.mc_gpregs.gp_x; +#elif defined(__QNX__) +#define UNW_BASE \ + register uint64_t unw_base __asm__("x0") = \ + (uint64_t)unw_ctx->uc_mcontext.cpu.gpr; +#else +#define UNW_BASE \ + register uint64_t unw_base __asm__("x0") = \ + (uint64_t)unw_ctx->uc_mcontext.regs; +#endif + +#define unw_tdep_getcontext(uc) \ + ({ \ + unw_tdep_context_t *unw_ctx = (uc); \ + UNW_BASE \ + __asm__ __volatile__("stp x0, x1, [%[base], #0]\n" \ + "stp x2, x3, [%[base], #16]\n" \ + "stp x4, x5, [%[base], #32]\n" \ + "stp x6, x7, [%[base], #48]\n" \ + "stp x8, x9, [%[base], #64]\n" \ + "stp x10, x11, [%[base], #80]\n" \ + "stp x12, x13, [%[base], #96]\n" \ + "stp x14, x15, [%[base], #112]\n" \ + "stp x16, x17, [%[base], #128]\n" \ + "stp x18, x19, [%[base], #144]\n" \ + "stp x20, x21, [%[base], #160]\n" \ + "stp x22, x23, [%[base], #176]\n" \ + "stp x24, x25, [%[base], #192]\n" \ + "stp x26, x27, [%[base], #208]\n" \ + "stp x28, x29, [%[base], #224]\n" \ + "mov x1, sp\n" \ + "stp x30, x1, [%[base], #240]\n" \ + "adr x1, ret%=\n" \ + "str x1, [%[base], #256]\n" \ + "mov %[base], #0\n" \ + "ret%=:\n" \ + : [base] "+r"(unw_base) \ + : \ + : "x1", "memory"); \ + (int)unw_base; \ + }) +#define unw_tdep_is_fpreg UNW_ARCH_OBJ(is_fpreg) + + extern int unw_tdep_is_fpreg(int); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* LIBUNWIND_H */ diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind-common.h b/systrace/thirdparty/aarch64/libunwind/libunwind-common.h new file mode 100644 index 0000000000000000000000000000000000000000..9c0db22b11df3075b718e08ceb5c89f6d4df57b0 --- /dev/null +++ b/systrace/thirdparty/aarch64/libunwind/libunwind-common.h @@ -0,0 +1,335 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2001-2004 Hewlett-Packard Co + Contributed by David Mosberger-Tang + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define UNW_VERSION_MAJOR 1 +#define UNW_VERSION_MINOR 9 +#define UNW_VERSION_EXTRA -pre + +#define UNW_VERSION_CODE(maj, min) (((maj) << 16) | (min)) +#define UNW_VERSION UNW_VERSION_CODE(UNW_VERSION_MAJOR, UNW_VERSION_MINOR) + +#ifdef __sun +// On SmartOS, gcc fails with the following error: +// +// ../include/libunwind-common.h:43:41: error: expected identifier or '(' before +// numeric constant # define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U,UNW_TARGET),_) +// ^ +// +// workaround is to undefine _U explicitly. +// see https://github.com/libunwind/libunwind/issues/118 for more details. +// +#undef _U +#endif + +#define UNW_PASTE2(x, y) x##y +#define UNW_PASTE(x, y) UNW_PASTE2(x, y) +#define UNW_OBJ(fn) UNW_PASTE(UNW_PREFIX, fn) +#define UNW_ARCH_OBJ(fn) UNW_PASTE(UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _), fn) + +#ifdef UNW_LOCAL_ONLY +#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_UL, UNW_TARGET), _) +#else /* !UNW_LOCAL_ONLY */ +#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _) +#endif /* !UNW_LOCAL_ONLY */ + +/* Error codes. The unwind routines return the *negated* values of + these error codes on error and a non-negative value on success. */ +typedef enum +{ + UNW_ESUCCESS = 0, /* no error */ + UNW_EUNSPEC, /* unspecified (general) error */ + UNW_ENOMEM, /* out of memory */ + UNW_EBADREG, /* bad register number */ + UNW_EREADONLYREG, /* attempt to write read-only register */ + UNW_ESTOPUNWIND, /* stop unwinding */ + UNW_EINVALIDIP, /* invalid IP */ + UNW_EBADFRAME, /* bad frame */ + UNW_EINVAL, /* unsupported operation or bad value */ + UNW_EBADVERSION, /* unwind info has unsupported version */ + UNW_ENOINFO /* no unwind info found */ +} unw_error_t; + +/* The following enum defines the indices for a couple of + (pseudo-)registers which have the same meaning across all + platforms. (RO) means read-only. (RW) means read-write. General + registers (aka "integer registers") are expected to start with + index 0. The number of such registers is architecture-dependent. + The remaining indices can be used as an architecture sees fit. The + last valid register index is given by UNW_REG_LAST. */ +typedef enum +{ + UNW_REG_IP = UNW_TDEP_IP, /* (rw) instruction pointer (pc) */ + UNW_REG_SP = UNW_TDEP_SP, /* (ro) stack pointer */ + UNW_REG_EH = UNW_TDEP_EH, /* (rw) exception-handling reg base */ + UNW_REG_LAST = UNW_TDEP_LAST_REG +} unw_frame_regnum_t; + +/* Number of exception-handler argument registers: */ +#define UNW_NUM_EH_REGS UNW_TDEP_NUM_EH_REGS + +typedef enum +{ + UNW_CACHE_NONE, /* no caching */ + UNW_CACHE_GLOBAL, /* shared global cache */ + UNW_CACHE_PER_THREAD /* per-thread caching */ +} unw_caching_policy_t; + +typedef enum +{ + UNW_INIT_SIGNAL_FRAME = 1 /* We know this is a signal frame */ +} unw_init_local2_flags_t; + +typedef int unw_regnum_t; + +/* The unwind cursor starts at the youngest (most deeply nested) frame + and is used to track the frame state as the unwinder steps from + frame to frame. It is safe to make (shallow) copies of variables + of this type. */ +typedef struct unw_cursor +{ + unw_word_t opaque[UNW_TDEP_CURSOR_LEN]; +} unw_cursor_t; + +/* This type encapsulates the entire (preserved) machine-state. */ +typedef unw_tdep_context_t unw_context_t; + +/* unw_getcontext() fills the unw_context_t pointed to by UC with the + machine state as it exists at the call-site. For implementation + reasons, this needs to be a target-dependent macro. It's easiest + to think of unw_getcontext() as being identical to getcontext(). */ +#define unw_getcontext(uc) unw_tdep_getcontext(uc) + +/* Return 1 if register number R is a floating-point register, zero + otherwise. + This routine is signal-safe. */ +#define unw_is_fpreg(r) unw_tdep_is_fpreg(r) + +typedef unw_tdep_fpreg_t unw_fpreg_t; + +typedef struct unw_addr_space *unw_addr_space_t; + +/* Each target may define it's own set of flags, but bits 0-15 are + reserved for general libunwind-use. */ +#define UNW_PI_FLAG_FIRST_TDEP_BIT 16 +/* The information comes from a .debug_frame section. */ +#define UNW_PI_FLAG_DEBUG_FRAME 32 + +typedef struct unw_proc_info +{ + unw_word_t start_ip; /* first IP covered by this procedure */ + unw_word_t end_ip; /* first IP NOT covered by this procedure */ +#if defined(NEED_LAST_IP) + unw_word_t last_ip; /* first IP that could begin another procedure */ +#endif + unw_word_t lsda; /* address of lang.-spec. data area (if any) */ + unw_word_t handler; /* optional personality routine */ + unw_word_t gp; /* global-pointer value for this procedure */ + unw_word_t flags; /* misc. flags */ + + int format; /* unwind-info format (arch-specific) */ + int unwind_info_size; /* size of the information (if applicable) */ + void *unwind_info; /* unwind-info (arch-specific) */ + unw_tdep_proc_info_t extra; /* target-dependent auxiliary proc-info */ +} unw_proc_info_t; + +typedef int (*unw_reg_states_callback)(void *token, void *reg_states_data, + size_t reg_states_data_size, + unw_word_t start_ip, unw_word_t end_ip); + +/* These are backend callback routines that provide access to the + state of a "remote" process. This can be used, for example, to + unwind another process through the ptrace() interface. */ +typedef struct unw_accessors +{ + /* Look up the unwind info associated with instruction-pointer IP. + On success, the routine fills in the PROC_INFO structure. */ + int (*find_proc_info)(unw_addr_space_t, unw_word_t, unw_proc_info_t *, int, + void *); + + /* Release any resources (e.g., memory) that were allocated for + the unwind info returned in by a previous call to + find_proc_info() with NEED_UNWIND_INFO set to 1. */ + void (*put_unwind_info)(unw_addr_space_t, unw_proc_info_t *, void *); + + /* Return the list-head of the dynamically registered unwind + info. */ + int (*get_dyn_info_list_addr)(unw_addr_space_t, unw_word_t *, void *); + + /* Access aligned word at address ADDR. The value is returned + according to the endianness of the host (e.g., if the host is + little-endian and the target is big-endian, access_mem() needs + to byte-swap the value before returning it). */ + int (*access_mem)(unw_addr_space_t, unw_word_t, unw_word_t *, int, void *); + + /* Access register number REG at address ADDR. */ + int (*access_reg)(unw_addr_space_t, unw_regnum_t, unw_word_t *, int, + void *); + + /* Access register number REG at address ADDR. */ + int (*access_fpreg)(unw_addr_space_t, unw_regnum_t, unw_fpreg_t *, int, + void *); + + int (*resume)(unw_addr_space_t, unw_cursor_t *, void *); + + /* Optional call back to obtain the name of a (static) procedure. + Dynamically generated procedures are handled automatically by + libunwind. This callback is optional and may be set to + NULL. */ + int (*get_proc_name)(unw_addr_space_t, unw_word_t, char *, size_t, + unw_word_t *, void *); + + /* Optional call back to obtain the name of a elf file where the ip belongs + to. This callback is optional and may be set to NULL. */ + int (*get_elf_filename)(unw_addr_space_t, unw_word_t, char *, size_t, + unw_word_t *, void *); + + /* Optional call back to obtain the start and end ip of a procedure. + * procedure ip range is [start, end), the range is without end. + * This callback is optional and may be set to NULL. + */ + int (*get_proc_ip_range)(unw_addr_space_t, unw_word_t, unw_word_t *, + unw_word_t *, void *); + + /* Optional call back to return a mask to be used with pointer + * authentication on arm64. + * + * The on bits in the returned mask indicate which bits in a return address + * are part of a pointer authentication code. These are the bits in the + * return address to turn off so that the calling frame can be found + * for the unwinding to continue. + * + * The return value must be host-endian. e.g. if the target is big-endian + * and the host is little endian, the implementation of this function + * must byte swap. + * + * This callback is optional and may be set to NULL. In this case all + * the bits in the return address are used, as if no masking were done. + */ + unw_word_t (*ptrauth_insn_mask)(unw_addr_space_t, void *); + +} unw_accessors_t; + +typedef enum unw_save_loc_type +{ + UNW_SLT_NONE, /* register is not saved ("not an l-value") */ + UNW_SLT_MEMORY, /* register has been saved in memory */ + UNW_SLT_REG /* register has been saved in (another) register */ +} unw_save_loc_type_t; + +typedef struct unw_save_loc +{ + unw_save_loc_type_t type; + union + { + unw_word_t addr; /* valid if type==UNW_SLT_MEMORY */ + unw_regnum_t regnum; /* valid if type==UNW_SLT_REG */ + } u; + unw_tdep_save_loc_t extra; /* target-dependent additional information */ +} unw_save_loc_t; + +struct dl_phdr_info; +typedef int (*unw_iterate_phdr_callback_t)(struct dl_phdr_info *, size_t, + void *); +typedef int (*unw_iterate_phdr_func_t)(unw_iterate_phdr_callback_t, void *); + +/* These routines work both for local and remote unwinding. */ + +#define unw_local_addr_space UNW_OBJ(local_addr_space) +#define unw_create_addr_space UNW_OBJ(create_addr_space) +#define unw_destroy_addr_space UNW_OBJ(destroy_addr_space) +#define unw_get_accessors UNW_ARCH_OBJ(get_accessors) +#define unw_get_accessors_int UNW_ARCH_OBJ(get_accessors_int) +#define unw_init_local UNW_OBJ(init_local) +#define unw_init_local2 UNW_OBJ(init_local2) +#define unw_init_remote UNW_OBJ(init_remote) +#define unw_step UNW_OBJ(step) +#define unw_resume UNW_OBJ(resume) +#define unw_get_proc_info UNW_OBJ(get_proc_info) +#define unw_get_proc_info_by_ip UNW_OBJ(get_proc_info_by_ip) +#define unw_get_proc_info_in_range UNW_OBJ(get_proc_info_in_range) +#define unw_reg_states_iterate UNW_OBJ(reg_states_iterate) +#define unw_apply_reg_state UNW_OBJ(apply_reg_state) +#define unw_get_reg UNW_OBJ(get_reg) +#define unw_set_reg UNW_OBJ(set_reg) +#define unw_get_fpreg UNW_OBJ(get_fpreg) +#define unw_set_fpreg UNW_OBJ(set_fpreg) +#define unw_get_save_loc UNW_OBJ(get_save_loc) +#define unw_is_signal_frame UNW_OBJ(is_signal_frame) +#define unw_is_plt_entry UNW_OBJ(is_plt_entry) +#define unw_get_proc_name UNW_OBJ(get_proc_name) +#define unw_get_proc_name_by_ip UNW_OBJ(get_proc_name_by_ip) +#define unw_get_elf_filename UNW_OBJ(get_elf_filename) +#define unw_get_elf_filename_by_ip UNW_OBJ(get_elf_filename_by_ip) +#define unw_set_caching_policy UNW_OBJ(set_caching_policy) +#define unw_set_cache_size UNW_OBJ(set_cache_size) +#define unw_set_iterate_phdr_function UNW_OBJ(set_iterate_phdr_function) +#define unw_regname UNW_ARCH_OBJ(regname) +#define unw_flush_cache UNW_ARCH_OBJ(flush_cache) +#define unw_strerror UNW_ARCH_OBJ(strerror) + +extern unw_addr_space_t unw_create_addr_space(unw_accessors_t *, int); +extern void unw_destroy_addr_space(unw_addr_space_t); +extern unw_accessors_t *unw_get_accessors(unw_addr_space_t); +extern unw_accessors_t *unw_get_accessors_int(unw_addr_space_t); +extern void unw_flush_cache(unw_addr_space_t, unw_word_t, unw_word_t); +extern int unw_set_caching_policy(unw_addr_space_t, unw_caching_policy_t); +extern int unw_set_cache_size(unw_addr_space_t, size_t, int); +extern void unw_set_iterate_phdr_function(unw_addr_space_t, + unw_iterate_phdr_func_t); +extern const char *unw_regname(unw_regnum_t); + +extern int unw_init_local(unw_cursor_t *, unw_context_t *); +extern int unw_init_local2(unw_cursor_t *, unw_context_t *, int); +extern int unw_init_remote(unw_cursor_t *, unw_addr_space_t, void *); +extern int unw_step(unw_cursor_t *); +extern int unw_resume(unw_cursor_t *); +extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *); +extern int unw_get_proc_info_by_ip(unw_addr_space_t, unw_word_t, + unw_proc_info_t *, void *); +extern int unw_get_proc_info_in_range(unw_word_t, unw_word_t, unw_word_t, + unw_word_t, unw_word_t, unw_word_t, + unw_addr_space_t, unw_word_t, + unw_proc_info_t *, int, void *); +extern int unw_reg_states_iterate(unw_cursor_t *, unw_reg_states_callback, + void *); +extern int unw_apply_reg_state(unw_cursor_t *, void *); +extern int unw_get_reg(unw_cursor_t *, int, unw_word_t *); +extern int unw_set_reg(unw_cursor_t *, int, unw_word_t); +extern int unw_get_fpreg(unw_cursor_t *, int, unw_fpreg_t *); +extern int unw_set_fpreg(unw_cursor_t *, int, unw_fpreg_t); +extern int unw_get_save_loc(unw_cursor_t *, int, unw_save_loc_t *); +extern int unw_is_signal_frame(unw_cursor_t *); +extern int unw_is_plt_entry(unw_cursor_t *); +extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *); +extern int unw_get_proc_name_by_ip(unw_addr_space_t, unw_word_t, char *, size_t, + unw_word_t *, void *); +extern int unw_get_elf_filename(unw_cursor_t *, char *, size_t, unw_word_t *); +extern int unw_get_elf_filename_by_ip(unw_addr_space_t, unw_word_t, char *, + size_t, unw_word_t *, void *); +extern const char *unw_strerror(int); +extern int unw_backtrace(void **, int); +extern int unw_backtrace2(void **, int, unw_context_t *, int); + +extern unw_addr_space_t unw_local_addr_space; diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind-dynamic.h b/systrace/thirdparty/aarch64/libunwind/libunwind-dynamic.h new file mode 100644 index 0000000000000000000000000000000000000000..13caf1633631ccc33d6c90ace394c539dd03f124 --- /dev/null +++ b/systrace/thirdparty/aarch64/libunwind/libunwind-dynamic.h @@ -0,0 +1,201 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2002-2004 Hewlett-Packard Co + Contributed by David Mosberger-Tang + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* This file defines the runtime-support routines for dynamically +generated code. Even though it is implemented as part of libunwind, +it is logically separate from the interface to perform the actual +unwinding. In particular, this interface is always used in the +context of the unwind target, whereas the rest of the unwind API is +used in context of the process that is doing the unwind (which may be +a debugger running on another machine, for example). + +Note that the data-structures declared here server a dual purpose: +when a program registers a dynamically generated procedure, it uses +these structures directly. On the other hand, with remote-unwinding, +the data-structures are read from the remote process's memory and +translated into internalized versions. To facilitate remote-access, +the following rules should be followed in declaring these structures: + + (1) Declare a member as a pointer only if the the information the + member points to needs to be internalized as well (e.g., a + string representing a procedure name should be declared as + "const char *", but the instruction pointer should be declared + as unw_word_t). + + (2) Provide sufficient padding to ensure that no implicit padding + will be needed on any of the supported target architectures. For + the time being, padding data structures with the assumption that + sizeof (unw_word_t) == 8 should be sufficient. (Note: it's not + impossible to internalize structures with internal padding, but + it does make the process a bit harder). + + (3) Don't declare members that contain bitfields or floating-point + values. + + (4) Don't declare members with enumeration types. Declare them as + int32_t instead. */ + +typedef enum +{ + UNW_DYN_STOP = 0, /* end-of-unwind-info marker */ + UNW_DYN_SAVE_REG, /* save register to another register */ + UNW_DYN_SPILL_FP_REL, /* frame-pointer-relative register spill */ + UNW_DYN_SPILL_SP_REL, /* stack-pointer-relative register spill */ + UNW_DYN_ADD, /* add constant value to a register */ + UNW_DYN_POP_FRAMES, /* drop one or more stack frames */ + UNW_DYN_LABEL_STATE, /* name the current state */ + UNW_DYN_COPY_STATE, /* set the region's entry-state */ + UNW_DYN_ALIAS /* get unwind info from an alias */ +} unw_dyn_operation_t; + +typedef enum +{ + UNW_INFO_FORMAT_DYNAMIC, /* unw_dyn_proc_info_t */ + UNW_INFO_FORMAT_TABLE, /* unw_dyn_table_t */ + UNW_INFO_FORMAT_REMOTE_TABLE, /* unw_dyn_remote_table_t */ + UNW_INFO_FORMAT_ARM_EXIDX, /* ARM specific unwind info */ + UNW_INFO_FORMAT_IP_OFFSET /* Like UNW_INFO_FORMAT_REMOTE_TABLE, but + table entries are considered + relative to di->start_ip, rather + than di->segbase */ +} unw_dyn_info_format_t; + +typedef struct unw_dyn_op +{ + int8_t tag; /* what operation? */ + int8_t qp; /* qualifying predicate register */ + int16_t reg; /* what register */ + int32_t when; /* when does it take effect? */ + unw_word_t val; /* auxiliary value */ +} unw_dyn_op_t; + +typedef struct unw_dyn_region_info +{ + struct unw_dyn_region_info *next; /* linked list of regions */ + int32_t insn_count; /* region length (# of instructions) */ + uint32_t op_count; /* length of op-array */ + unw_dyn_op_t op[1]; /* variable-length op-array */ +} unw_dyn_region_info_t; + +typedef struct unw_dyn_proc_info +{ + unw_word_t name_ptr; /* address of human-readable procedure name */ + unw_word_t handler; /* address of personality routine */ + uint32_t flags; + int32_t pad0; + unw_dyn_region_info_t *regions; +} unw_dyn_proc_info_t; + +typedef struct unw_dyn_table_info +{ + unw_word_t name_ptr; /* addr. of table name (e.g., library name) */ + unw_word_t segbase; /* segment base */ + unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */ + unw_word_t *table_data; +} unw_dyn_table_info_t; + +typedef struct unw_dyn_remote_table_info +{ + unw_word_t name_ptr; /* addr. of table name (e.g., library name) */ + unw_word_t segbase; /* segment base */ + unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */ + unw_word_t table_data; +} unw_dyn_remote_table_info_t; + +typedef struct unw_dyn_info +{ + /* doubly-linked list of dyn-info structures: */ + struct unw_dyn_info *next; + struct unw_dyn_info *prev; + unw_word_t start_ip; /* first IP covered by this entry */ + unw_word_t end_ip; /* first IP NOT covered by this entry */ + unw_word_t gp; /* global-pointer in effect for this entry */ + int32_t format; /* real type: unw_dyn_info_format_t */ + int32_t pad; + unw_word_t load_offset; /* ELF load offset */ + union + { + unw_dyn_proc_info_t pi; + unw_dyn_table_info_t ti; + unw_dyn_remote_table_info_t rti; + } u; +} unw_dyn_info_t; + +typedef struct unw_dyn_info_list +{ + uint32_t version; + uint32_t generation; + unw_dyn_info_t *first; +} unw_dyn_info_list_t; + +/* Return the size (in bytes) of an unw_dyn_region_info_t structure that can + hold OP_COUNT ops. */ +#define _U_dyn_region_info_size(op_count) \ + ((char *)(((unw_dyn_region_info_t *)NULL)->op + (op_count)) - (char *)NULL) + +/* Register the unwind info for a single procedure. + This routine is NOT signal-safe. */ +extern void _U_dyn_register(unw_dyn_info_t *); + +/* Cancel the unwind info for a single procedure. + This routine is NOT signal-safe. */ +extern void _U_dyn_cancel(unw_dyn_info_t *); + +/* Convenience routines. */ + +#define _U_dyn_op(_tag, _qp, _when, _reg, _val) \ + ((unw_dyn_op_t){(_tag), (_qp), (_reg), (_when), (_val)}) + +#define _U_dyn_op_save_reg(op, qp, when, reg, dst) \ + (*(op) = _U_dyn_op(UNW_DYN_SAVE_REG, (qp), (when), (reg), (dst))) + +#define _U_dyn_op_spill_fp_rel(op, qp, when, reg, offset) \ + (*(op) = _U_dyn_op(UNW_DYN_SPILL_FP_REL, (qp), (when), (reg), (offset))) + +#define _U_dyn_op_spill_sp_rel(op, qp, when, reg, offset) \ + (*(op) = _U_dyn_op(UNW_DYN_SPILL_SP_REL, (qp), (when), (reg), (offset))) + +#define _U_dyn_op_add(op, qp, when, reg, value) \ + (*(op) = _U_dyn_op(UNW_DYN_ADD, (qp), (when), (reg), (value))) + +#define _U_dyn_op_pop_frames(op, qp, when, num_frames) \ + (*(op) = _U_dyn_op(UNW_DYN_POP_FRAMES, (qp), (when), 0, (num_frames))) + +#define _U_dyn_op_label_state(op, label) \ + (*(op) = _U_dyn_op(UNW_DYN_LABEL_STATE, _U_QP_TRUE, -1, 0, (label))) + +#define _U_dyn_op_copy_state(op, label) \ + (*(op) = _U_dyn_op(UNW_DYN_COPY_STATE, _U_QP_TRUE, -1, 0, (label))) + +#define _U_dyn_op_alias(op, qp, when, addr) \ + (*(op) = _U_dyn_op(UNW_DYN_ALIAS, (qp), (when), 0, (addr))) + +#define _U_dyn_op_stop(op) \ + (*(op) = _U_dyn_op(UNW_DYN_STOP, _U_QP_TRUE, -1, 0, 0)) + +/* The target-dependent qualifying predicate which is always TRUE. On + IA-64, that's p0 (0), on non-predicated architectures, the value is + ignored. */ +#define _U_QP_TRUE _U_TDEP_QP_TRUE diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind.h b/systrace/thirdparty/aarch64/libunwind/libunwind.h new file mode 100644 index 0000000000000000000000000000000000000000..1624c7f5963663a56a6bcdbc4ba1ac2da31e4a73 --- /dev/null +++ b/systrace/thirdparty/aarch64/libunwind/libunwind.h @@ -0,0 +1,40 @@ +/* Provide a real file - not a symlink - as it would cause multiarch conflicts + when multiple different arch releases are installed simultaneously. */ + +#ifndef UNW_REMOTE_ONLY + +#if defined __aarch64__ +#include "libunwind-aarch64.h" +#elif defined __arm__ +#include "libunwind-arm.h" +#elif defined __hppa__ +#include "libunwind-hppa.h" +#elif defined __ia64__ +#include "libunwind-ia64.h" +#elif defined __mips__ +#include "libunwind-mips.h" +#elif defined __powerpc__ && !defined __powerpc64__ +#include "libunwind-ppc32.h" +#elif defined __powerpc64__ +#include "libunwind-ppc64.h" +#elif defined __sh__ +#include "libunwind-sh.h" +#elif defined __i386__ +#include "libunwind-x86.h" +#elif defined __x86_64__ +#include "libunwind-x86_64.h" +#elif defined __s390x__ +#include "libunwind-s390x.h" +#elif defined __riscv || defined __riscv__ +#include "libunwind-riscv.h" +#elif defined __loongarch64 +#include "libunwind-loongarch64.h" +#else +#error "Unsupported arch" +#endif + +#else /* UNW_REMOTE_ONLY */ + +#include "libunwind-aarch64.h" + +#endif /* UNW_REMOTE_ONLY */ diff --git a/systrace/thirdparty/aarch64/libunwind/unwind.h b/systrace/thirdparty/aarch64/libunwind/unwind.h new file mode 100644 index 0000000000000000000000000000000000000000..69201dc8929eb8fcb5c63d059ca538c5fc4273a4 --- /dev/null +++ b/systrace/thirdparty/aarch64/libunwind/unwind.h @@ -0,0 +1,158 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2003 Hewlett-Packard Co + Contributed by David Mosberger-Tang + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _UNWIND_H +#define _UNWIND_H + +/* For uint64_t */ +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* Minimal interface as per C++ ABI draft standard: + + http://www.codesourcery.com/cxx-abi/abi-eh.html */ + + typedef enum + { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 + } _Unwind_Reason_Code; + + typedef int _Unwind_Action; + +#define _UA_SEARCH_PHASE 1 +#define _UA_CLEANUP_PHASE 2 +#define _UA_HANDLER_FRAME 4 +#define _UA_FORCE_UNWIND 8 + + struct _Unwind_Context; /* opaque data-structure */ + struct _Unwind_Exception; /* forward-declaration */ + + typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, + struct _Unwind_Exception *); + + typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, + uint64_t, + struct _Unwind_Exception *, + struct _Unwind_Context *, + void *); + + /* The C++ ABI requires exception_class, private_1, and private_2 to + be of type uint64 and the entire structure to be + double-word-aligned. Please note that exception_class stays 64-bit + even on 32-bit machines for gcc compatibility. */ + struct _Unwind_Exception + { + alignas(8) uint64_t exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + unsigned long private_1; + unsigned long private_2; + }; + + extern _Unwind_Reason_Code + _Unwind_RaiseException(struct _Unwind_Exception *); + extern _Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); + extern void _Unwind_Resume(struct _Unwind_Exception *); + extern void _Unwind_DeleteException(struct _Unwind_Exception *); + extern unsigned long _Unwind_GetGR(struct _Unwind_Context *, int); + extern void _Unwind_SetGR(struct _Unwind_Context *, int, unsigned long); + extern unsigned long _Unwind_GetIP(struct _Unwind_Context *); + extern unsigned long _Unwind_GetIPInfo(struct _Unwind_Context *, int *); + extern void _Unwind_SetIP(struct _Unwind_Context *, unsigned long); + extern unsigned long + _Unwind_GetLanguageSpecificData(struct _Unwind_Context *); + extern unsigned long _Unwind_GetRegionStart(struct _Unwind_Context *); + +#ifdef _GNU_SOURCE + + /* Callback for _Unwind_Backtrace(). The backtrace stops immediately + if the callback returns any value other than _URC_NO_REASON. */ + typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); + +/* See http://gcc.gnu.org/ml/gcc-patches/2001-09/msg00082.html for why + _UA_END_OF_STACK exists. */ +#define _UA_END_OF_STACK 16 + + /* If the unwind was initiated due to a forced unwind, resume that + operation, else re-raise the exception. This is used by + __cxa_rethrow(). */ + extern _Unwind_Reason_Code + _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *); + + /* See http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00154.html for why + _Unwind_GetBSP() exists. */ + extern unsigned long _Unwind_GetBSP(struct _Unwind_Context *); + + /* Return the "canonical frame address" for the given context. + This is used by NPTL... */ + extern unsigned long _Unwind_GetCFA(struct _Unwind_Context *); + + /* Return the base-address for data references. */ + extern unsigned long _Unwind_GetDataRelBase(struct _Unwind_Context *); + + /* Return the base-address for text references. */ + extern unsigned long _Unwind_GetTextRelBase(struct _Unwind_Context *); + + /* Call _Unwind_Trace_Fn once for each stack-frame, without doing any + cleanup. The first frame for which the callback is invoked is the + one for the caller of _Unwind_Backtrace(). _Unwind_Backtrace() + returns _URC_END_OF_STACK when the backtrace stopped due to + reaching the end of the call-chain or _URC_FATAL_PHASE1_ERROR if it + stops for any other reason. */ + extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + + /* Find the start-address of the procedure containing the specified IP + or NULL if it cannot be found (e.g., because the function has no + unwind info). Note: there is not necessarily a one-to-one + correspondence between source-level functions and procedures: some + functions don't have unwind-info and others are split into multiple + procedures. */ + extern void *_Unwind_FindEnclosingFunction(void *); + + /* See also Linux Standard Base Spec: + http://www.linuxbase.org/spec/refspecs/LSB_1.3.0/gLSB/gLSB/libgcc-s.html + */ + +#endif /* _GNU_SOURCE */ + +#ifdef __cplusplus +}; +#endif + +#endif /* _UNWIND_H */ diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti.h b/systrace/thirdparty/aarch64/mspti/include/mspti.h new file mode 100644 index 0000000000000000000000000000000000000000..e83c454c11cb784c7a22f82f50127f2f9d2a368c --- /dev/null +++ b/systrace/thirdparty/aarch64/mspti/include/mspti.h @@ -0,0 +1,19 @@ +/** + * @file mspti.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_H +#define MSPTI_H + +#include "mspti_activity.h" +#include "mspti_callback.h" +#include "mspti_cbid.h" +#include "mspti_result.h" + +#endif diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_activity.h b/systrace/thirdparty/aarch64/mspti/include/mspti_activity.h new file mode 100644 index 0000000000000000000000000000000000000000..30f71598d073b9637c9ec440939f30f65ef30e74 --- /dev/null +++ b/systrace/thirdparty/aarch64/mspti/include/mspti_activity.h @@ -0,0 +1,424 @@ +/** + * @file mspti_activity.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_ACTIVITY_H +#define MSPTI_ACTIVITY_H + +#define ACTIVITY_STRUCT_ALIGNMENT 8 +#if defined(_WIN32) +#define START_PACKED_ALIGNMENT __pragma(pack(push, 1)) +#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_STRUCT_ALIGNMENT)) +#define END_PACKED_ALIGNMENT __pragma(pack(pop)) +#elif defined(__GNUC__) +#define START_PACKED_ALIGNMENT +#define PACKED_ALIGNMENT \ + __attribute__((__packed__)) \ + __attribute__((aligned(ACTIVITY_STRUCT_ALIGNMENT))) +#define END_PACKED_ALIGNMENT +#else +#define START_PACKED_ALIGNMENT +#define PACKED_ALIGNMENT +#define END_PACKED_ALIGNMENT +#endif + +#include "mspti_result.h" +#include +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility push(default) +#endif + + /** + * @brief The kinds of activity records. + * + * Each kind is associated with a + * activity record structure that holds the information associated + * with the kind. + */ + typedef enum + { + /** + * The activity record is invalid. + */ + MSPTI_ACTIVITY_KIND_INVALID = 0, + MSPTI_ACTIVITY_KIND_MARKER = 1, + MSPTI_ACTIVITY_KIND_KERNEL = 2, + MSPTI_ACTIVITY_KIND_API = 3, + MSPTI_ACTIVITY_KIND_COUNT, + MSPTI_ACTIVITY_KIND_FORCE_INT = 0x7fffffff + } msptiActivityKind; + + /** + * @brief The source kinds of mark data. + * + * Each mark activity record kind represents information about host or + * device + */ + typedef enum + { + MSPTI_ACTIVITY_SOURCE_KIND_HOST = 0, + MSPTI_ACTIVITY_SOURCE_KIND_DEVICE = 1 + } msptiActivitySourceKind; + + /** + * @brief Flags linked to activity records. + * + * These are the Flags that pertain to activity records. + * Flags can be combined by bitwise OR to + * associated multiple flags with an activity record. + */ + typedef enum + { + /** + * Signifies that the activity record lacks any flags. + */ + MSPTI_ACTIVITY_FLAG_NONE = 0, + /** + * Represents the activity as a pure host instantaneous marker. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS = 1 << 0, + /** + * Represents the activity as a pure host region start marker. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_START = 1 << 1, + /** + * Represents the activity as a pure host region end marker. Works with + * MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_END = 1 << 2, + /** + * Represents the activity as an instantaneous marker with device. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS_WITH_DEVICE = 1 << 3, + /** + * Represents the activity as a pure start marker with device. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_START_WITH_DEVICE = 1 << 4, + /** + * Represents the activity as a pure end marker with device. Works with + * MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_END_WITH_DEVICE = 1 << 5 + } msptiActivityFlag; + + START_PACKED_ALIGNMENT + + typedef struct PACKED_ALIGNMENT + { + msptiActivityKind kind; + } msptiActivity; + + typedef union PACKED_ALIGNMENT + { + /** + * A thread object requires that we identify both the process and + * thread ID. + */ + struct + { + uint32_t processId; + uint32_t threadId; + } pt; + /** + * A stream object requires that we identify device and stream ID. + */ + struct + { + uint32_t deviceId; + uint32_t streamId; + } ds; + } msptiObjectId; + + /** + * @brief This activity record serves as a marker, representing a specific + * moment in time. + * + * The marker is characterized by a distinctive name and a unique identifier + */ + typedef struct PACKED_ALIGNMENT + { + /** + * The activity record kind, always be MSPTI_ACTIVITY_KIND_MARKER. + */ + msptiActivityKind kind; + + /** + * The flags associated with the marker. + * @see msptiActivityFlag + */ + msptiActivityFlag flag; + + /** + * The source kinds of mark data. + * @see msptiActivitySourceKind + */ + msptiActivitySourceKind sourceKind; + + /** + * The timestamp for the marker, in ns. A value of 0 indicates that + * timestamp information could not be collected for the marker. + */ + uint64_t timestamp; + + /** + * The marker ID. + */ + uint64_t id; + + /** + * The identifier for the activity object associated with this + * marker. 'objectKind' indicates which ID is valid for this record. + */ + msptiObjectId objectId; + + /** + * The marker name for an instantaneous or start marker. + * This will be NULL for an end marker. + */ + const char *name; + + /** + * The name of the domain to which this marker belongs to. + * This will be NULL for default domain. + */ + const char *domain; + } msptiActivityMarker; + + typedef struct PACKED_ALIGNMENT + { + /** + * The activity record kind, must be MSPTI_ACTIVITY_KIND_API. + */ + msptiActivityKind kind; + + /** + * The start timestamp for the api, in ns. + */ + uint64_t start; + + /** + * The end timestamp for the api, in ns. + */ + uint64_t end; + + /** + * A thread object requires that we identify both the process and + * thread ID. + */ + struct + { + uint32_t processId; + uint32_t threadId; + } pt; + + /** + * The correlation ID of the kernel. + */ + uint64_t correlationId; + + /** + * The api name. + */ + const char *name; + } msptiActivityApi; + + typedef struct PACKED_ALIGNMENT + { + /** + * The activity record kind, must be MSPTI_ACTIVITY_KIND_KERNEL. + */ + msptiActivityKind kind; + + /** + * The start timestamp for the kernel, in ns. + */ + uint64_t start; + + /** + * The end timestamp for the kernel, in ns. + */ + uint64_t end; + + /** + * A stream object requires that we identify device and stream ID. + */ + struct + { + uint32_t deviceId; + uint32_t streamId; + } ds; + + /** + * The correlation ID of the kernel. + */ + uint64_t correlationId; + + /** + * The kernel type. + */ + const char *type; + + /** + * The kernel name. + */ + const char *name; + } msptiActivityKernel; + + END_PACKED_ALIGNMENT + + /** + * @brief Function type for callback used by MSPTI to request an empty + * buffer for storing activity records. + * + * This callback function signals the MSPTI client that an activity + * buffer is needed by MSPTI. The activity buffer is used by MSPTI to + * store activity records. The callback function can decline the + * request by setting **buffer to NULL. In this case MSPTI may drop + * activity records. + * + * @param buffer Returns the new buffer. If set to NULL then no buffer + * is returned. + * @param size Returns the size of the returned buffer. + * @param maxNumRecords Returns the maximum number of records that + * should be placed in the buffer. If 0 then the buffer is filled with + * as many records as possible. If > 0 the buffer is filled with at + * most that many records before it is returned. + */ + typedef void (*msptiBuffersCallbackRequestFunc)(uint8_t **buffer, + size_t *size, + size_t *maxNumRecords); + + /** + * @brief Function type for callback used by MSPTI to return a buffer + * of activity records. + * + * This callback function returns to the MSPTI client a buffer + * containing activity records. The buffer contains @p validSize + * bytes of activity records which should be read using + * msptiActivityGetNextRecord. After this call MSPTI + * relinquished ownership of the buffer and will not use it + * anymore. The client may return the buffer to MSPTI using the + * msptiBuffersCallbackRequestFunc callback. + * + * @param buffer The activity record buffer. + * @param size The total size of the buffer in bytes as set in + * MSPTI_BuffersCallbackRequestFunc. + * @param validSize The number of valid bytes in the buffer. + */ + typedef void (*msptiBuffersCallbackCompleteFunc)(uint8_t *buffer, + size_t size, + size_t validSize); + + /** + * @brief Registers callback functions with MSPTI for activity buffer + * handling. + * + * This function registers two callback functions to be used in asynchronous + * buffer handling. If registered, activity record buffers are handled using + * asynchronous requested/completed callbacks from MSPTI. + * + * @param funcBufferRequested callback which is invoked when an empty + * buffer is requested by MSPTI + * @param funcBufferCompleted callback which is invoked when a buffer + * containing activity records is available from MSPTI + * + * @retval MSPTI_SUCCESS + * @retval MSPTI_ERROR_INVALID_PARAMETER if either + * funcBufferRequested or funcBufferCompleted is NULL + */ + msptiResult msptiActivityRegisterCallbacks( + msptiBuffersCallbackRequestFunc funcBufferRequested, + msptiBuffersCallbackCompleteFunc funcBufferCompleted); + + /** + * @brief Enable collection of a specific kind of activity record. + * + * Enable collection of a specific kind of activity record. Multiple + * kinds can be enabled by calling this function multiple times. + * By default, the collection of all activity types is inactive. + * + * @param kind The kind of activity record to collect + * + * @retval MSPTI_SUCCESS + */ + msptiResult msptiActivityEnable(msptiActivityKind kind); + + /** + * @brief Disable collection of a specific kind of activity record. + * + * Disable collection of a specific kind of activity record. Multiple + * kinds can be disabled by calling this function multiple times. + * By default, the collection of all activity types is inactive. + * + * @param kind The kind of activity record to stop collecting + * + * @retval MSPTI_SUCCESS + */ + msptiResult msptiActivityDisable(msptiActivityKind kind); + + /** + * @brief Iterate over the activity records in a buffer. + * + * This is a function to iterate over the activity records in buffer. + * + * @param buffer The buffer containing activity records + * @param validBufferSizeBytes The number of valid bytes in the buffer. + * @param record Inputs the previous record returned by + * msptiActivityGetNextRecord and returns the next activity record + * from the buffer. If input value is NULL, returns the first activity + * record in the buffer. + * + * @retval MSPTI_SUCCESS + * @retval MSPTI_ERROR_MAX_LIMIT_REACHED if no more records in the buffer + * @retval MSPTI_ERROR_INVALID_PARAMETER if buffer is NULL. + */ + msptiResult msptiActivityGetNextRecord(uint8_t *buffer, + size_t validBufferSizeBytes, + msptiActivity **record); + + /** + * @brief Request to deliver activity records via the buffer completion + * callback. + * + * This function returns the activity records associated with all + * contexts/streams (and the global buffers not associated with any stream) + * to the MSPTI client using the callback registered in + * msptiActivityRegisterCallbacks. It return all activity buffers that + * contain completed activity records, even if these buffers are not + * completely filled. + * + * Before calling this function, the buffer handling callback api must be + * activated by calling msptiActivityRegisterCallbacks. + * + * @param flag Reserved for internal use. + * + * @retval MSPTI_SUCCESS + */ + msptiResult msptiActivityFlushAll(uint32_t flag); + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility pop +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_callback.h b/systrace/thirdparty/aarch64/mspti/include/mspti_callback.h new file mode 100644 index 0000000000000000000000000000000000000000..2e6f7ee2264b9e99f5f891fdc6ac3cd20d53bf66 --- /dev/null +++ b/systrace/thirdparty/aarch64/mspti/include/mspti_callback.h @@ -0,0 +1,258 @@ +/** + * @file mspti_callback.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_CALLBACK_H +#define MSPTI_CALLBACK_H + +#include "mspti_cbid.h" +#include "mspti_result.h" +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility push(default) +#endif + + /** + * @brief Callback domains. + * + * Callback domains. Each domain represents callback points for a + * group of related API functions or CANN driver activity. + */ + typedef enum + { + /** + * Invalid domain. + */ + MSPTI_CB_DOMAIN_INVALID = 0, + /** + * Domain containing callback points for all runtime API functions. + */ + MSPTI_CB_DOMAIN_RUNTIME = 1, + MSPTI_CB_DOMAIN_HCCL = 2, + MSPTI_CB_DOMAIN_SIZE, + MSPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff + } msptiCallbackDomain; + + typedef uint32_t msptiCallbackId; + + /** + * @brief Specifies the point in an API call that a callback is issued. + * + * Specifies the point in an API call that a callback is issued. This + * value is communicated to the callback function by @ref + * msptiCallbackData::callbackSite. + */ + typedef enum + { + /** + * The callback is at the entry of the API call. + */ + MSPTI_API_ENTER = 0, + /** + * The callback is at the exit of the API call. + */ + MSPTI_API_EXIT = 1, + MSPTI_API_CBSITE_FORCE_INT = 0x7fffffff + } msptiApiCallbackSite; + + typedef struct + { + /** + * Point in the runtime or driver function from where the callback + * was issued. + */ + msptiApiCallbackSite callbackSite; + + /** + * Name of the runtime or driver API function which issued the + * callback. + */ + const char *functionName; + + /** + * Params of the runtime or driver API function which issued the + * callback. + */ + const void *functionParams; + + /** + * Pointer to the return value of the runtime or driver API + * call. + */ + const void *functionReturnValue; + + /** + * Name of the symbol operated on by the runtime or driver API + * function which issued the callback. This entry is valid only for + * driver and runtime launch callbacks, where it returns the name of + * the kernel. + */ + const char *symbolName; + + /** + * The activity record correlation ID for this callback. For a + * driver domain callback (i.e. @p domain + * MSPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID + * in the MSPTI_ActivityAPI record corresponding to the CANN driver + * function call. For a runtime domain callback (i.e. @p domain + * MSPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation + * ID in the MSPTI_ActivityAPI record corresponding to the CANN + * runtime function call. Within the callback, this ID can be + * recorded to correlate user data with the activity record. + */ + uint64_t correlationId; + + /** + * Undefined. Reserved for internal use. + */ + uint64_t reserved1; + + /** + * Undefined. Reserved for internal use. + */ + uint64_t reserved2; + + /** + * Pointer to data shared between the entry and exit callbacks of + * a given runtime or drive API function invocation. This field + * can be used to pass 64-bit values from the entry callback to + * the corresponding exit callback. + */ + uint64_t *correlationData; + } msptiCallbackData; + + /** + * @brief Function type for a callback. + * + * Function type for a callback. The type of the data passed to the + * callback in @p cbdata depends on the @p domain. If @p domain is + * MSPTI_CB_DOMAIN_RUNTIME the type + * of @p cbdata will be msptiCallbackData. + * + * @param userdata User data supplied at subscription of the callback + * @param domain The domain of the callback + * @param cbid The ID of the callback + * @param cbdata Data passed to the callback. + */ + typedef void (*msptiCallbackFunc)(void *userdata, + msptiCallbackDomain domain, + msptiCallbackId cbid, + const msptiCallbackData *cbdata); + + struct msptiSubscriber_st; + + /** + * @brief A callback subscriber. + */ + typedef struct msptiSubscriber_st *msptiSubscriberHandle; + + /** + * @brief Initialize a callback subscriber with a callback function + * and user data. + * + * Initializes a callback subscriber with a callback function and + * (optionally) a pointer to user data. The returned subscriber handle + * can be used to enable and disable the callback for specific domains + * and callback IDs. + * @note Only a single subscriber can be registered at a time. To ensure + * that no other MSPTI client interrupts the profiling session, it's the + * responsibility of all the MSPTI clients to call this function before + * starting the profling session. + * @note This function does not enable any callbacks. + * @note @b Thread-safety: this function is thread safe. + * + * @param subscriber handle to initialize subscriber + * @param callback The callback function + * @param userdata A pointer to user data. This data will be passed to + * the callback function via the @p userdata paramater. + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INNER if unable to initialize MSPTI + * @retval MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is + * already a MSPTI subscriber + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL + */ + msptiResult msptiSubscribe(msptiSubscriberHandle *subscriber, + msptiCallbackFunc callback, void *userdata); + + /** + * @brief Unregister a callback subscriber. + * + * Removes a callback subscriber so that no future callbacks will be + * issued to that subscriber. + * + * @param subscriber Handle to the initialize subscriber + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL or not + * initialized + */ + msptiResult msptiUnsubscribe(msptiSubscriberHandle subscriber); + + /** + * @brief Enable or disabled callbacks for a specific domain and + * callback ID. + * + * Enable or disabled callbacks for a subscriber for a specific domain + * and callback ID. + * + * @note @b Thread-safety: a subscriber must serialize access to + * msptiEnableCallback, msptiEnableDomain. + * + * @param enable New enable state for the callback. Zero disables the + * callback, non-zero enables the callback. + * @param subscriber Handle to callback subscription + * @param domain The domain of the callback + * @param cbid The ID of the callback + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain or @p + * cbid is invalid. + */ + msptiResult msptiEnableCallback(uint32_t enable, + msptiSubscriberHandle subscriber, + msptiCallbackDomain domain, + msptiCallbackId cbid); + + /** + * @brief Enable or disabled callbacks for a specific domain + * + * Enable or disabled callbacks for a subscriber for a specific domain + * + * @note @b Thread-safety: a subscriber must serialize access to + * msptiEnableCallback, msptiEnableDomain. + * + * @param enable New enable state for the callback. Zero disables the + * callback, non-zero enables the callback. + * @param subscriber Handle to callback subscription + * @param domain The domain of the callback + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain is + * invalid. + */ + msptiResult msptiEnableDomain(uint32_t enable, + msptiSubscriberHandle subscriber, + msptiCallbackDomain domain); + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility pop +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_cbid.h b/systrace/thirdparty/aarch64/mspti/include/mspti_cbid.h new file mode 100644 index 0000000000000000000000000000000000000000..540ad394376e5a9f6bb74fb0a53c9072a24b1a9c --- /dev/null +++ b/systrace/thirdparty/aarch64/mspti/include/mspti_cbid.h @@ -0,0 +1,83 @@ +/** + * @file mspti_cbid.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_CBID_H +#define MSPTI_CBID_H + +/** + * @brief Definitions of indices for Runtime API functions, unique across entire + * API + */ +typedef enum +{ + MSPTI_CBID_RUNTIME_INVALID = 0, + MSPTI_CBID_RUNTIME_DEVICE_SET = 1, + MSPTI_CBID_RUNTIME_DEVICE_RESET = 2, + MSPTI_CBID_RUNTIME_DEVICE_SET_EX = 3, + MSPTI_CBID_RUNTIME_CONTEXT_CREATED_EX = 4, + MSPTI_CBID_RUNTIME_CONTEXT_CREATED = 5, + MSPTI_CBID_RUNTIME_CONTEXT_DESTROY = 6, + MSPTI_CBID_RUNTIME_STREAM_CREATED = 7, + MSPTI_CBID_RUNTIME_STREAM_DESTROY = 8, + MSPTI_CBID_RUNTIME_STREAM_SYNCHRONIZED = 9, + MSPTI_CBID_RUNTIME_LAUNCH = 10, + MSPTI_CBID_RUNTIME_CPU_LAUNCH = 11, + MSPTI_CBID_RUNTIME_AICPU_LAUNCH = 12, + MSPTI_CBID_RUNTIME_AIV_LAUNCH = 13, + MSPTI_CBID_RUNTIME_FFTS_LAUNCH = 14, + MSPTI_CBID_RUNTIME_MALLOC = 15, + MSPTI_CBID_RUNTIME_FREE = 16, + MSPTI_CBID_RUNTIME_MALLOC_HOST = 17, + MSPTI_CBID_RUNTIME_FREE_HOST = 18, + MSPTI_CBID_RUNTIME_MALLOC_CACHED = 19, + MSPTI_CBID_RUNTIME_FLUSH_CACHE = 20, + MSPTI_CBID_RUNTIME_INVALID_CACHE = 21, + MSPTI_CBID_RUNTIME_MEMCPY = 22, + MSPTI_CBID_RUNTIME_MEMCPY_HOST = 23, + MSPTI_CBID_RUNTIME_MEMCPY_ASYNC = 24, + MSPTI_CBID_RUNTIME_MEM_CPY2D = 25, + MSPTI_CBID_RUNTIME_MEM_CPY2D_ASYNC = 26, + MSPTI_CBID_RUNTIME_MEM_SET = 27, + MSPTI_CBID_RUNTIME_MEM_SET_ASYNC = 28, + MSPTI_CBID_RUNTIME_MEM_GET_INFO = 29, + MSPTI_CBID_RUNTIME_RESERVE_MEM_ADDRESS = 30, + MSPTI_CBID_RUNTIME_RELEASE_MEM_ADDRESS = 31, + MSPTI_CBID_RUNTIME_MALLOC_PHYSICAL = 32, + MSPTI_CBID_RUNTIME_FREE_PHYSICAL = 33, + MSPTI_CBID_RUNTIME_MEM_EXPORT_TO_SHAREABLE_HANDLE = 34, + MSPTI_CBID_RUNTIME_MEM_IMPORT_FROM_SHAREABLE_HANDLE = 35, + MSPTI_CBID_RUNTIME_MEM_SET_PID_TO_SHAREABLE_HANDLE = 36, + MSPTI_CBID_RUNTIME_SIZE, + MSPTI_CBID_RUNTIME_FORCE_INT = 0x7fffffff +} msptiCallbackIdRuntime; + +/** + * @brief Definitions of indices for hccl API functions + */ +typedef enum +{ + MSPTI_CBID_HCCL_INVALID = 0, + MSPTI_CBID_HCCL_ALLREDUCE = 1, + MSPTI_CBID_HCCL_BROADCAST = 2, + MSPTI_CBID_HCCL_ALLGATHER = 3, + MSPTI_CBID_HCCL_REDUCE_SCATTER = 4, + MSPTI_CBID_HCCL_REDUCE = 5, + MSPTI_CBID_HCCL_ALL_TO_ALL = 6, + MSPTI_CBID_HCCL_ALL_TO_ALLV = 7, + MSPTI_CBID_HCCL_BARRIER = 8, + MSPTI_CBID_HCCL_SCATTER = 9, + MSPTI_CBID_HCCL_SEND = 10, + MSPTI_CBID_HCCL_RECV = 11, + MSPTI_CBID_HCCL_SENDRECV = 12, + MSPTI_CBID_HCCL_SIZE, + MSPTI_CBID_HCCL_FORCE_INT = 0x7fffffff +} msptiCallbackIdHccl; + +#endif diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_result.h b/systrace/thirdparty/aarch64/mspti/include/mspti_result.h new file mode 100644 index 0000000000000000000000000000000000000000..902647eed2e5efc7b69f2d2dd865e228d4a22d0e --- /dev/null +++ b/systrace/thirdparty/aarch64/mspti/include/mspti_result.h @@ -0,0 +1,30 @@ +/** + * @file mspti_result.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_BASE_H +#define MSPTI_BASE_H + +/** + * @brief MSPTI result codes. + * + * Error and result codes returned by MSPTI functions. + */ +typedef enum +{ + MSPTI_SUCCESS = 0, + MSPTI_ERROR_INVALID_PARAMETER = 1, + MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 2, + MSPTI_ERROR_MAX_LIMIT_REACHED = 3, + MSPTI_ERROR_DEVICE_OFFLINE = 4, + MSPTI_ERROR_INNER = 999, + MSPTI_ERROR_FOECE_INT = 0x7fffffff +} msptiResult; + +#endif diff --git a/systrace/thirdparty/aarch64/mspti/lib64/libmspti.so b/systrace/thirdparty/aarch64/mspti/lib64/libmspti.so new file mode 100644 index 0000000000000000000000000000000000000000..c6bc165910ce21933220f48149ef4f3ba240b8dd Binary files /dev/null and b/systrace/thirdparty/aarch64/mspti/lib64/libmspti.so differ diff --git a/systrace/thirdparty/uthash.h b/systrace/thirdparty/uthash.h new file mode 100644 index 0000000000000000000000000000000000000000..6d892006a8fca1a00848bb1426d0460cb060b0b5 --- /dev/null +++ b/systrace/thirdparty/uthash.h @@ -0,0 +1,1417 @@ +/* +Copyright (c) 2003-2025, Troy D. Hanson https://troydhanson.github.io/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.3.0 + +#include /* ptrdiff_t */ +#include /* exit */ +#include /* memcmp, memset, strlen */ + +#if defined(HASH_NO_STDINT) && HASH_NO_STDINT +/* The user doesn't have , and must figure out their own way + to provide definitions for uint8_t and uint32_t. */ +#else +#include /* uint8_t, uint32_t */ +#endif + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if !defined(DECLTYPE) && !defined(NO_DECLTYPE) +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#endif +#elif defined(__MCST__) /* Elbrus C Compiler */ +#define DECLTYPE(x) (__typeof(x)) +#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || \ + defined(__WATCOMC__) +#define NO_DECLTYPE +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE(x) +#define DECLTYPE_ASSIGN(dst, src) \ + do \ + { \ + char **_da_dst = (char **)(&(dst)); \ + *_da_dst = (char *)(src); \ + } while (0) +#else +#define DECLTYPE_ASSIGN(dst, src) \ + do \ + { \ + (dst) = DECLTYPE(dst)(src); \ + } while (0) +#endif + +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr, sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_bzero +#define uthash_bzero(a, n) memset(a, '\0', n) +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif + +#ifndef HASH_FUNCTION +#define HASH_FUNCTION(keyptr, keylen, hashv) HASH_JEN(keyptr, keylen, hashv) +#endif + +#ifndef HASH_KEYCMP +#define HASH_KEYCMP(a, b, n) memcmp(a, b, n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +#ifndef HASH_NONFATAL_OOM +#define HASH_NONFATAL_OOM 0 +#endif + +#if HASH_NONFATAL_OOM +/* malloc failures can be recovered from */ + +#ifndef uthash_nonfatal_oom +#define uthash_nonfatal_oom(obj) \ + do \ + { \ + } while (0) /* non-fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) \ + do \ + { \ + (oomed) = 1; \ + } while (0) +#define IF_HASH_NONFATAL_OOM(x) x + +#else +/* malloc failures result in lost memory, hash tables are unusable */ + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") +#define IF_HASH_NONFATAL_OOM(x) + +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 \ + 5U /* lg2 of initial number of buckets \ + */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl, hhp) ((void *)(((char *)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl, elp) \ + ((UT_hash_handle *)(void *)(((char *)(elp)) + ((tbl)->hho))) + +#define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ + do \ + { \ + struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ + unsigned _hd_bkt; \ + HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + (head)->hh.tbl->buckets[_hd_bkt].count++; \ + _hd_hh_item->hh_next = NULL; \ + _hd_hh_item->hh_prev = NULL; \ + } while (0) + +#define HASH_VALUE(keyptr, keylen, hashv) \ + do \ + { \ + HASH_FUNCTION(keyptr, keylen, hashv); \ + } while (0) + +#define HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, hashval, out) \ + do \ + { \ + (out) = NULL; \ + if (head) \ + { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval)) \ + { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, \ + (head)->hh.tbl->buckets[_hf_bkt], keyptr, \ + keylen, hashval, out); \ + } \ + } \ + } while (0) + +#define HASH_FIND(hh, head, keyptr, keylen, out) \ + do \ + { \ + (out) = NULL; \ + if (head) \ + { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ + } \ + } while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN \ + (HASH_BLOOM_BITLEN / 8UL) + (((HASH_BLOOM_BITLEN % 8UL) != 0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl, oomed) \ + do \ + { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t *)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!(tbl)->bloom_bv) \ + { \ + HASH_RECORD_OOM(oomed); \ + } \ + else \ + { \ + uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + } \ + } while (0) + +#define HASH_BLOOM_FREE(tbl) \ + do \ + { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + } while (0) + +#define HASH_BLOOM_BITSET(bv, idx) (bv[(idx) / 8U] |= (1U << ((idx) % 8U))) +#define HASH_BLOOM_BITTEST(bv, idx) \ + ((bv[(idx) / 8U] & (1U << ((idx) % 8U))) != 0) + +#define HASH_BLOOM_ADD(tbl, hashv) \ + HASH_BLOOM_BITSET( \ + (tbl)->bloom_bv, \ + ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl, hashv) \ + HASH_BLOOM_BITTEST( \ + (tbl)->bloom_bv, \ + ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl, oomed) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl, hashv) +#define HASH_BLOOM_TEST(tbl, hashv) 1 +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh, head, oomed) \ + do \ + { \ + (head)->hh.tbl = \ + (UT_hash_table *)uthash_malloc(sizeof(UT_hash_table)); \ + if (!(head)->hh.tbl) \ + { \ + HASH_RECORD_OOM(oomed); \ + } \ + else \ + { \ + uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char *)(&(head)->hh) - (char *)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket *)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ + if (!(head)->hh.tbl->buckets) \ + { \ + HASH_RECORD_OOM(oomed); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } \ + else \ + { \ + uthash_bzero((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * \ + sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ + IF_HASH_NONFATAL_OOM(if (oomed) { \ + uthash_free((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * \ + sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + }) \ + } \ + } \ + } while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, \ + hashval, add, replaced, cmpfcn) \ + do \ + { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, replaced); \ + if (replaced) \ + { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), \ + keylen_in, hashval, add, cmpfcn); \ + } while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add, \ + replaced) \ + do \ + { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, replaced); \ + if (replaced) \ + { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, add); \ + } while (0) + +#define HASH_REPLACE(hh, head, fieldname, keylen_in, add, replaced) \ + do \ + { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, \ + add, replaced); \ + } while (0) + +#define HASH_REPLACE_INORDER(hh, head, fieldname, keylen_in, add, replaced, \ + cmpfcn) \ + do \ + { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, \ + _hr_hashv, add, replaced, cmpfcn); \ + } while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ + do \ + { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } while (0) + +#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn) \ + do \ + { \ + do \ + { \ + if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) \ + { \ + break; \ + } \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ + } while (0) + +#ifdef NO_DECLTYPE +#undef HASH_AKBI_INNER_LOOP +#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn) \ + do \ + { \ + char *_hs_saved_head = (char *)(head); \ + do \ + { \ + DECLTYPE_ASSIGN(head, _hs_iter); \ + if (cmpfcn(head, add) > 0) \ + { \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + break; \ + } \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ + } while (0) +#endif + +#if HASH_NONFATAL_OOM + +#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed) \ + do \ + { \ + if (!(oomed)) \ + { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, \ + oomed); \ + if (oomed) \ + { \ + HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ + HASH_DELETE_HH(hh, head, &(add)->hh); \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ + else \ + { \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } \ + } \ + else \ + { \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ + } while (0) + +#else + +#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed) \ + do \ + { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, \ + oomed); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } while (0) + +#endif + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, \ + hashval, add, cmpfcn) \ + do \ + { \ + IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) \ + { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( \ + }) \ + } \ + else \ + { \ + void *_hs_iter = (head); \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ + if (_hs_iter) \ + { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) \ + { \ + HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = \ + (add); \ + } \ + else \ + { \ + (head) = (add); \ + } \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ + } \ + else \ + { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, \ + _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ + } while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh, head, keyptr, keylen_in, add, cmpfcn) \ + do \ + { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, \ + _hs_hashv, add, cmpfcn); \ + } while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, hashval, \ + add, cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), \ + keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh, head, fieldname, keylen_in, add, cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, \ + cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, hashval, add) \ + do \ + { \ + IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (const void *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) \ + { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( \ + }) \ + } \ + else \ + { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, \ + _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ + } while (0) + +#define HASH_ADD_KEYPTR(hh, head, keyptr, keylen_in, add) \ + do \ + { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, \ + add); \ + } while (0) + +#define HASH_ADD_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, \ + hashval, add) + +#define HASH_ADD(hh, head, fieldname, keylen_in, add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv, num_bkts, bkt) \ + do \ + { \ + bkt = ((hashv) & ((num_bkts) - 1U)); \ + } while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh, head, delptr) HASH_DELETE_HH(hh, head, &(delptr)->hh) + +#define HASH_DELETE_HH(hh, head, delptrhh) \ + do \ + { \ + const struct UT_hash_handle *_hd_hh_del = (delptrhh); \ + if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) \ + { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * \ + sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ + else \ + { \ + unsigned _hd_bkt; \ + if (_hd_hh_del == (head)->hh.tbl->tail) \ + { \ + (head)->hh.tbl->tail = \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ + } \ + if (_hd_hh_del->prev != NULL) \ + { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = \ + _hd_hh_del->next; \ + } \ + else \ + { \ + DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ + } \ + if (_hd_hh_del->next != NULL) \ + { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, \ + _hd_bkt); \ + HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ + } while (0) + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head, findstr, out) \ + do \ + { \ + unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ + HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ + } while (0) +#define HASH_ADD_STR(head, strfield, add) \ + do \ + { \ + unsigned _uthash_hastr_keylen = \ + (unsigned)uthash_strlen((add)->strfield); \ + HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ + } while (0) +#define HASH_REPLACE_STR(head, strfield, add, replaced) \ + do \ + { \ + unsigned _uthash_hrstr_keylen = \ + (unsigned)uthash_strlen((add)->strfield); \ + HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, \ + replaced); \ + } while (0) +#define HASH_FIND_INT(head, findint, out) \ + HASH_FIND(hh, head, findint, sizeof(int), out) +#define HASH_ADD_INT(head, intfield, add) \ + HASH_ADD(hh, head, intfield, sizeof(int), add) +#define HASH_REPLACE_INT(head, intfield, add, replaced) \ + HASH_REPLACE(hh, head, intfield, sizeof(int), add, replaced) +#define HASH_FIND_PTR(head, findptr, out) \ + HASH_FIND(hh, head, findptr, sizeof(void *), out) +#define HASH_ADD_PTR(head, ptrfield, add) \ + HASH_ADD(hh, head, ptrfield, sizeof(void *), add) +#define HASH_REPLACE_PTR(head, ptrfield, add, replaced) \ + HASH_REPLACE(hh, head, ptrfield, sizeof(void *), add, replaced) +#define HASH_DEL(head, delptr) HASH_DELETE(hh, head, delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is + * defined. This is for uthash developer only; it compiles away if HASH_DEBUG + * isn't defined. + */ +#ifdef HASH_DEBUG +#include /* fprintf, stderr */ +#define HASH_OOPS(...) \ + do \ + { \ + fprintf(stderr, __VA_ARGS__); \ + exit(-1); \ + } while (0) +#define HASH_FSCK(hh, head, where) \ + do \ + { \ + struct UT_hash_handle *_thh; \ + if (head) \ + { \ + unsigned _bkt_i; \ + unsigned _count = 0; \ + char *_prev; \ + for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) \ + { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) \ + { \ + if (_prev != (char *)(_thh->hh_prev)) \ + { \ + HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ + (where), (void *)_thh->hh_prev, \ + (void *)_prev); \ + } \ + _bkt_count++; \ + _prev = (char *)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) \ + { \ + HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ + (where), (head)->hh.tbl->buckets[_bkt_i].count, \ + _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) \ + { \ + HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) \ + { \ + _count++; \ + if (_prev != (char *)_thh->prev) \ + { \ + HASH_OOPS("%s: invalid prev %p, actual %p\n", (where), \ + (void *)_thh->prev, (void *)_prev); \ + } \ + _prev = (char *)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) \ + : NULL); \ + } \ + if (_count != (head)->hh.tbl->num_items) \ + { \ + HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + } \ + } while (0) +#else +#define HASH_FSCK(hh, head, where) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) \ + do \ + { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ + } while (0) +#else +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. + */ +#define HASH_BER(key, keylen, hashv) \ + do \ + { \ + unsigned _hb_keylen = (unsigned)keylen; \ + const unsigned char *_hb_key = (const unsigned char *)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) \ + { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ + } while (0) + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + * (archive link: https://archive.is/Ivcan ) + */ +#define HASH_SAX(key, keylen, hashv) \ + do \ + { \ + unsigned _sx_i; \ + const unsigned char *_hs_key = (const unsigned char *)(key); \ + hashv = 0; \ + for (_sx_i = 0; _sx_i < keylen; _sx_i++) \ + { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ + } while (0) +/* FNV-1a variation */ +#define HASH_FNV(key, keylen, hashv) \ + do \ + { \ + unsigned _fn_i; \ + const unsigned char *_hf_key = (const unsigned char *)(key); \ + (hashv) = 2166136261U; \ + for (_fn_i = 0; _fn_i < keylen; _fn_i++) \ + { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ + } while (0) + +#define HASH_OAT(key, keylen, hashv) \ + do \ + { \ + unsigned _ho_i; \ + const unsigned char *_ho_key = (const unsigned char *)(key); \ + hashv = 0; \ + for (_ho_i = 0; _ho_i < keylen; _ho_i++) \ + { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + } while (0) + +#define HASH_JEN_MIX(a, b, c) \ + do \ + { \ + a -= b; \ + a -= c; \ + a ^= (c >> 13); \ + b -= c; \ + b -= a; \ + b ^= (a << 8); \ + c -= a; \ + c -= b; \ + c ^= (b >> 13); \ + a -= b; \ + a -= c; \ + a ^= (c >> 12); \ + b -= c; \ + b -= a; \ + b ^= (a << 16); \ + c -= a; \ + c -= b; \ + c ^= (b >> 5); \ + a -= b; \ + a -= c; \ + a ^= (c >> 3); \ + b -= c; \ + b -= a; \ + b ^= (a << 10); \ + c -= a; \ + c -= b; \ + c ^= (b >> 15); \ + } while (0) + +#define HASH_JEN(key, keylen, hashv) \ + do \ + { \ + unsigned _hj_i, _hj_j, _hj_k; \ + unsigned const char *_hj_key = (unsigned const char *)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) \ + { \ + _hj_i += \ + (_hj_key[0] + ((unsigned)_hj_key[1] << 8) + \ + ((unsigned)_hj_key[2] << 16) + ((unsigned)_hj_key[3] << 24)); \ + _hj_j += \ + (_hj_key[4] + ((unsigned)_hj_key[5] << 8) + \ + ((unsigned)_hj_key[6] << 16) + ((unsigned)_hj_key[7] << 24)); \ + hashv += (_hj_key[8] + ((unsigned)_hj_key[9] << 8) + \ + ((unsigned)_hj_key[10] << 16) + \ + ((unsigned)_hj_key[11] << 24)); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch (_hj_k) \ + { \ + case 11: \ + hashv += ((unsigned)_hj_key[10] << 24); /* FALLTHROUGH */ \ + case 10: \ + hashv += ((unsigned)_hj_key[9] << 16); /* FALLTHROUGH */ \ + case 9: \ + hashv += ((unsigned)_hj_key[8] << 8); /* FALLTHROUGH */ \ + case 8: \ + _hj_j += ((unsigned)_hj_key[7] << 24); /* FALLTHROUGH */ \ + case 7: \ + _hj_j += ((unsigned)_hj_key[6] << 16); /* FALLTHROUGH */ \ + case 6: \ + _hj_j += ((unsigned)_hj_key[5] << 8); /* FALLTHROUGH */ \ + case 5: \ + _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: \ + _hj_i += ((unsigned)_hj_key[3] << 24); /* FALLTHROUGH */ \ + case 3: \ + _hj_i += ((unsigned)_hj_key[2] << 16); /* FALLTHROUGH */ \ + case 2: \ + _hj_i += ((unsigned)_hj_key[1] << 8); /* FALLTHROUGH */ \ + case 1: \ + _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ + default:; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + } while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || \ + defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__) +#define get16bits(d) (*((const uint16_t *)(d))) +#endif + +#if !defined(get16bits) +#define get16bits(d) \ + ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) + \ + (uint32_t)(((const uint8_t *)(d))[0])) +#endif +#define HASH_SFH(key, keylen, hashv) \ + do \ + { \ + unsigned const char *_sfh_key = (unsigned const char *)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (; _sfh_len > 0U; _sfh_len--) \ + { \ + hashv += get16bits(_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits(_sfh_key + 2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U * sizeof(uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) \ + { \ + case 3: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof(uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: \ + hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + break; \ + default:; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + } while (0) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl, hh, head, keyptr, keylen_in, hashval, out) \ + do \ + { \ + if ((head).hh_head != NULL) \ + { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } \ + else \ + { \ + (out) = NULL; \ + } \ + while ((out) != NULL) \ + { \ + if ((out)->hh.hashv == (hashval) && \ + (out)->hh.keylen == (keylen_in)) \ + { \ + if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) \ + { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) \ + { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } \ + else \ + { \ + (out) = NULL; \ + } \ + } \ + } while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head, hh, addhh, oomed) \ + do \ + { \ + UT_hash_bucket *_ha_head = &(head); \ + _ha_head->count++; \ + (addhh)->hh_next = _ha_head->hh_head; \ + (addhh)->hh_prev = NULL; \ + if (_ha_head->hh_head != NULL) \ + { \ + _ha_head->hh_head->hh_prev = (addhh); \ + } \ + _ha_head->hh_head = (addhh); \ + if ((_ha_head->count >= \ + ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) && \ + !(addhh)->tbl->noexpand) \ + { \ + HASH_EXPAND_BUCKETS(addhh, (addhh)->tbl, oomed); \ + IF_HASH_NONFATAL_OOM(if (oomed) { HASH_DEL_IN_BKT(head, addhh); }) \ + } \ + } while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(head, delhh) \ + do \ + { \ + UT_hash_bucket *_hd_head = &(head); \ + _hd_head->count--; \ + if (_hd_head->hh_head == (delhh)) \ + { \ + _hd_head->hh_head = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_prev) \ + { \ + (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_next) \ + { \ + (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ + } \ + } while (0) + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(hh, tbl, oomed) \ + do \ + { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket *)uthash_malloc( \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + if (!_he_new_buckets) \ + { \ + HASH_RECORD_OOM(oomed); \ + } \ + else \ + { \ + uthash_bzero(_he_new_buckets, sizeof(struct UT_hash_bucket) * \ + (tbl)->num_buckets * 2U); \ + (tbl)->ideal_chain_maxlen = \ + ((tbl)->num_items >> ((tbl)->log2_num_buckets + 1U)) + \ + ((((tbl)->num_items & (((tbl)->num_buckets * 2U) - 1U)) != 0U) \ + ? 1U \ + : 0U); \ + (tbl)->nonideal_items = 0; \ + for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = (tbl)->buckets[_he_bkt_i].hh_head; \ + while (_he_thh != NULL) \ + { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, \ + _he_bkt); \ + _he_newbkt = &(_he_new_buckets[_he_bkt]); \ + if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) \ + { \ + (tbl)->nonideal_items++; \ + if (_he_newbkt->count > _he_newbkt->expand_mult * \ + (tbl)->ideal_chain_maxlen) \ + { \ + _he_newbkt->expand_mult++; \ + } \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) \ + { \ + _he_newbkt->hh_head->hh_prev = _he_thh; \ + } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free((tbl)->buckets, \ + (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ + (tbl)->num_buckets *= 2U; \ + (tbl)->log2_num_buckets++; \ + (tbl)->buckets = _he_new_buckets; \ + (tbl)->ineff_expands = \ + ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) \ + ? ((tbl)->ineff_expands + 1U) \ + : 0U; \ + if ((tbl)->ineff_expands > 1U) \ + { \ + (tbl)->noexpand = 1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ + } \ + } while (0) + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head, cmpfcn) HASH_SRT(hh, head, cmpfcn) +#define HASH_SRT(hh, head, cmpfcn) \ + do \ + { \ + unsigned _hs_i; \ + unsigned _hs_looping, _hs_nmerges, _hs_insize, _hs_psize, _hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) \ + { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) \ + { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) \ + { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) \ + { \ + _hs_psize++; \ + _hs_q = \ + ((_hs_q->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) \ + : NULL); \ + if (_hs_q == NULL) \ + { \ + break; \ + } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize != 0U) || \ + ((_hs_qsize != 0U) && (_hs_q != NULL))) \ + { \ + if (_hs_psize == 0U) \ + { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_q->next) \ + : NULL); \ + _hs_qsize--; \ + } \ + else if ((_hs_qsize == 0U) || (_hs_q == NULL)) \ + { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) \ + { \ + _hs_p = ((_hs_p->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_p->next) \ + : NULL); \ + } \ + _hs_psize--; \ + } \ + else if ((cmpfcn(DECLTYPE(head)(ELMT_FROM_HH( \ + (head)->hh.tbl, _hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH( \ + (head)->hh.tbl, _hs_q)))) <= 0) \ + { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) \ + { \ + _hs_p = ((_hs_p->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_p->next) \ + : NULL); \ + } \ + _hs_psize--; \ + } \ + else \ + { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) \ + ? HH_FROM_ELMT((head)->hh.tbl, \ + _hs_q->next) \ + : NULL); \ + _hs_qsize--; \ + } \ + if (_hs_tail != NULL) \ + { \ + _hs_tail->next = \ + ((_hs_e != NULL) \ + ? ELMT_FROM_HH((head)->hh.tbl, _hs_e) \ + : NULL); \ + } \ + else \ + { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) \ + { \ + _hs_e->prev = \ + ((_hs_tail != NULL) \ + ? ELMT_FROM_HH((head)->hh.tbl, _hs_tail) \ + : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL) \ + { \ + _hs_tail->next = NULL; \ + } \ + if (_hs_nmerges <= 1U) \ + { \ + _hs_looping = 0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head, \ + ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh, head, "HASH_SRT"); \ + } \ + } while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ + do \ + { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt = NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh = NULL; \ + ptrdiff_t _dst_hho = ((char *)(&(dst)->hh_dst) - (char *)(dst)); \ + if ((src) != NULL) \ + { \ + for (_src_bkt = 0; _src_bkt < (src)->hh_src.tbl->num_buckets; \ + _src_bkt++) \ + { \ + for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; _src_hh = _src_hh->hh_next) \ + { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) \ + { \ + IF_HASH_NONFATAL_OOM(int _hs_oomed = 0;) \ + _dst_hh = (UT_hash_handle *)(void *)(((char *)_elt) + \ + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) \ + { \ + _last_elt_hh->next = _elt; \ + } \ + if ((dst) == NULL) \ + { \ + DECLTYPE_ASSIGN(dst, _elt); \ + HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ + IF_HASH_NONFATAL_OOM(if (_hs_oomed) { \ + uthash_nonfatal_oom(_elt); \ + (dst) = NULL; \ + continue; \ + }) \ + } \ + else \ + { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, \ + _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], \ + hh_dst, _dst_hh, _hs_oomed); \ + (dst)->hh_dst.tbl->num_items++; \ + IF_HASH_NONFATAL_OOM(if (_hs_oomed) { \ + HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ + HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ + _dst_hh->tbl = NULL; \ + uthash_nonfatal_oom(_elt); \ + continue; \ + }) \ + HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ + } while (0) + +#define HASH_CLEAR(hh, head) \ + do \ + { \ + if ((head) != NULL) \ + { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * \ + sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ + } while (0) + +#define HASH_OVERHEAD(hh, head) \ + (((head) != NULL) \ + ? ((size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + (HASH_BLOOM_BYTELEN))) \ + : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh, head, el, tmp) \ + for (((el) = (head)), \ + ((*(char **)(&(tmp))) = \ + (char *)((head != NULL) ? (head)->hh.next : NULL)); \ + (el) != NULL; ((el) = (tmp)), \ + ((*(char **)(&(tmp))) = \ + (char *)((tmp != NULL) ? (tmp)->hh.next : NULL))) +#else +#define HASH_ITER(hh, head, el, tmp) \ + for (((el) = (head)), \ + ((tmp) = DECLTYPE(el)((head != NULL) ? (head)->hh.next : NULL)); \ + (el) != NULL; \ + ((el) = (tmp)), \ + ((tmp) = DECLTYPE(el)((tmp != NULL) ? (tmp)->hh.next : NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh, head) +#define HASH_CNT(hh, head) ((head != NULL) ? ((head)->hh.tbl->num_items) : 0U) + +typedef struct UT_hash_bucket +{ + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table +{ + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps + */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t + bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle +{ + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + const void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ \ No newline at end of file diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind-common.h b/systrace/thirdparty/x86_64/libunwind/libunwind-common.h new file mode 100644 index 0000000000000000000000000000000000000000..9c0db22b11df3075b718e08ceb5c89f6d4df57b0 --- /dev/null +++ b/systrace/thirdparty/x86_64/libunwind/libunwind-common.h @@ -0,0 +1,335 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2001-2004 Hewlett-Packard Co + Contributed by David Mosberger-Tang + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define UNW_VERSION_MAJOR 1 +#define UNW_VERSION_MINOR 9 +#define UNW_VERSION_EXTRA -pre + +#define UNW_VERSION_CODE(maj, min) (((maj) << 16) | (min)) +#define UNW_VERSION UNW_VERSION_CODE(UNW_VERSION_MAJOR, UNW_VERSION_MINOR) + +#ifdef __sun +// On SmartOS, gcc fails with the following error: +// +// ../include/libunwind-common.h:43:41: error: expected identifier or '(' before +// numeric constant # define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U,UNW_TARGET),_) +// ^ +// +// workaround is to undefine _U explicitly. +// see https://github.com/libunwind/libunwind/issues/118 for more details. +// +#undef _U +#endif + +#define UNW_PASTE2(x, y) x##y +#define UNW_PASTE(x, y) UNW_PASTE2(x, y) +#define UNW_OBJ(fn) UNW_PASTE(UNW_PREFIX, fn) +#define UNW_ARCH_OBJ(fn) UNW_PASTE(UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _), fn) + +#ifdef UNW_LOCAL_ONLY +#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_UL, UNW_TARGET), _) +#else /* !UNW_LOCAL_ONLY */ +#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _) +#endif /* !UNW_LOCAL_ONLY */ + +/* Error codes. The unwind routines return the *negated* values of + these error codes on error and a non-negative value on success. */ +typedef enum +{ + UNW_ESUCCESS = 0, /* no error */ + UNW_EUNSPEC, /* unspecified (general) error */ + UNW_ENOMEM, /* out of memory */ + UNW_EBADREG, /* bad register number */ + UNW_EREADONLYREG, /* attempt to write read-only register */ + UNW_ESTOPUNWIND, /* stop unwinding */ + UNW_EINVALIDIP, /* invalid IP */ + UNW_EBADFRAME, /* bad frame */ + UNW_EINVAL, /* unsupported operation or bad value */ + UNW_EBADVERSION, /* unwind info has unsupported version */ + UNW_ENOINFO /* no unwind info found */ +} unw_error_t; + +/* The following enum defines the indices for a couple of + (pseudo-)registers which have the same meaning across all + platforms. (RO) means read-only. (RW) means read-write. General + registers (aka "integer registers") are expected to start with + index 0. The number of such registers is architecture-dependent. + The remaining indices can be used as an architecture sees fit. The + last valid register index is given by UNW_REG_LAST. */ +typedef enum +{ + UNW_REG_IP = UNW_TDEP_IP, /* (rw) instruction pointer (pc) */ + UNW_REG_SP = UNW_TDEP_SP, /* (ro) stack pointer */ + UNW_REG_EH = UNW_TDEP_EH, /* (rw) exception-handling reg base */ + UNW_REG_LAST = UNW_TDEP_LAST_REG +} unw_frame_regnum_t; + +/* Number of exception-handler argument registers: */ +#define UNW_NUM_EH_REGS UNW_TDEP_NUM_EH_REGS + +typedef enum +{ + UNW_CACHE_NONE, /* no caching */ + UNW_CACHE_GLOBAL, /* shared global cache */ + UNW_CACHE_PER_THREAD /* per-thread caching */ +} unw_caching_policy_t; + +typedef enum +{ + UNW_INIT_SIGNAL_FRAME = 1 /* We know this is a signal frame */ +} unw_init_local2_flags_t; + +typedef int unw_regnum_t; + +/* The unwind cursor starts at the youngest (most deeply nested) frame + and is used to track the frame state as the unwinder steps from + frame to frame. It is safe to make (shallow) copies of variables + of this type. */ +typedef struct unw_cursor +{ + unw_word_t opaque[UNW_TDEP_CURSOR_LEN]; +} unw_cursor_t; + +/* This type encapsulates the entire (preserved) machine-state. */ +typedef unw_tdep_context_t unw_context_t; + +/* unw_getcontext() fills the unw_context_t pointed to by UC with the + machine state as it exists at the call-site. For implementation + reasons, this needs to be a target-dependent macro. It's easiest + to think of unw_getcontext() as being identical to getcontext(). */ +#define unw_getcontext(uc) unw_tdep_getcontext(uc) + +/* Return 1 if register number R is a floating-point register, zero + otherwise. + This routine is signal-safe. */ +#define unw_is_fpreg(r) unw_tdep_is_fpreg(r) + +typedef unw_tdep_fpreg_t unw_fpreg_t; + +typedef struct unw_addr_space *unw_addr_space_t; + +/* Each target may define it's own set of flags, but bits 0-15 are + reserved for general libunwind-use. */ +#define UNW_PI_FLAG_FIRST_TDEP_BIT 16 +/* The information comes from a .debug_frame section. */ +#define UNW_PI_FLAG_DEBUG_FRAME 32 + +typedef struct unw_proc_info +{ + unw_word_t start_ip; /* first IP covered by this procedure */ + unw_word_t end_ip; /* first IP NOT covered by this procedure */ +#if defined(NEED_LAST_IP) + unw_word_t last_ip; /* first IP that could begin another procedure */ +#endif + unw_word_t lsda; /* address of lang.-spec. data area (if any) */ + unw_word_t handler; /* optional personality routine */ + unw_word_t gp; /* global-pointer value for this procedure */ + unw_word_t flags; /* misc. flags */ + + int format; /* unwind-info format (arch-specific) */ + int unwind_info_size; /* size of the information (if applicable) */ + void *unwind_info; /* unwind-info (arch-specific) */ + unw_tdep_proc_info_t extra; /* target-dependent auxiliary proc-info */ +} unw_proc_info_t; + +typedef int (*unw_reg_states_callback)(void *token, void *reg_states_data, + size_t reg_states_data_size, + unw_word_t start_ip, unw_word_t end_ip); + +/* These are backend callback routines that provide access to the + state of a "remote" process. This can be used, for example, to + unwind another process through the ptrace() interface. */ +typedef struct unw_accessors +{ + /* Look up the unwind info associated with instruction-pointer IP. + On success, the routine fills in the PROC_INFO structure. */ + int (*find_proc_info)(unw_addr_space_t, unw_word_t, unw_proc_info_t *, int, + void *); + + /* Release any resources (e.g., memory) that were allocated for + the unwind info returned in by a previous call to + find_proc_info() with NEED_UNWIND_INFO set to 1. */ + void (*put_unwind_info)(unw_addr_space_t, unw_proc_info_t *, void *); + + /* Return the list-head of the dynamically registered unwind + info. */ + int (*get_dyn_info_list_addr)(unw_addr_space_t, unw_word_t *, void *); + + /* Access aligned word at address ADDR. The value is returned + according to the endianness of the host (e.g., if the host is + little-endian and the target is big-endian, access_mem() needs + to byte-swap the value before returning it). */ + int (*access_mem)(unw_addr_space_t, unw_word_t, unw_word_t *, int, void *); + + /* Access register number REG at address ADDR. */ + int (*access_reg)(unw_addr_space_t, unw_regnum_t, unw_word_t *, int, + void *); + + /* Access register number REG at address ADDR. */ + int (*access_fpreg)(unw_addr_space_t, unw_regnum_t, unw_fpreg_t *, int, + void *); + + int (*resume)(unw_addr_space_t, unw_cursor_t *, void *); + + /* Optional call back to obtain the name of a (static) procedure. + Dynamically generated procedures are handled automatically by + libunwind. This callback is optional and may be set to + NULL. */ + int (*get_proc_name)(unw_addr_space_t, unw_word_t, char *, size_t, + unw_word_t *, void *); + + /* Optional call back to obtain the name of a elf file where the ip belongs + to. This callback is optional and may be set to NULL. */ + int (*get_elf_filename)(unw_addr_space_t, unw_word_t, char *, size_t, + unw_word_t *, void *); + + /* Optional call back to obtain the start and end ip of a procedure. + * procedure ip range is [start, end), the range is without end. + * This callback is optional and may be set to NULL. + */ + int (*get_proc_ip_range)(unw_addr_space_t, unw_word_t, unw_word_t *, + unw_word_t *, void *); + + /* Optional call back to return a mask to be used with pointer + * authentication on arm64. + * + * The on bits in the returned mask indicate which bits in a return address + * are part of a pointer authentication code. These are the bits in the + * return address to turn off so that the calling frame can be found + * for the unwinding to continue. + * + * The return value must be host-endian. e.g. if the target is big-endian + * and the host is little endian, the implementation of this function + * must byte swap. + * + * This callback is optional and may be set to NULL. In this case all + * the bits in the return address are used, as if no masking were done. + */ + unw_word_t (*ptrauth_insn_mask)(unw_addr_space_t, void *); + +} unw_accessors_t; + +typedef enum unw_save_loc_type +{ + UNW_SLT_NONE, /* register is not saved ("not an l-value") */ + UNW_SLT_MEMORY, /* register has been saved in memory */ + UNW_SLT_REG /* register has been saved in (another) register */ +} unw_save_loc_type_t; + +typedef struct unw_save_loc +{ + unw_save_loc_type_t type; + union + { + unw_word_t addr; /* valid if type==UNW_SLT_MEMORY */ + unw_regnum_t regnum; /* valid if type==UNW_SLT_REG */ + } u; + unw_tdep_save_loc_t extra; /* target-dependent additional information */ +} unw_save_loc_t; + +struct dl_phdr_info; +typedef int (*unw_iterate_phdr_callback_t)(struct dl_phdr_info *, size_t, + void *); +typedef int (*unw_iterate_phdr_func_t)(unw_iterate_phdr_callback_t, void *); + +/* These routines work both for local and remote unwinding. */ + +#define unw_local_addr_space UNW_OBJ(local_addr_space) +#define unw_create_addr_space UNW_OBJ(create_addr_space) +#define unw_destroy_addr_space UNW_OBJ(destroy_addr_space) +#define unw_get_accessors UNW_ARCH_OBJ(get_accessors) +#define unw_get_accessors_int UNW_ARCH_OBJ(get_accessors_int) +#define unw_init_local UNW_OBJ(init_local) +#define unw_init_local2 UNW_OBJ(init_local2) +#define unw_init_remote UNW_OBJ(init_remote) +#define unw_step UNW_OBJ(step) +#define unw_resume UNW_OBJ(resume) +#define unw_get_proc_info UNW_OBJ(get_proc_info) +#define unw_get_proc_info_by_ip UNW_OBJ(get_proc_info_by_ip) +#define unw_get_proc_info_in_range UNW_OBJ(get_proc_info_in_range) +#define unw_reg_states_iterate UNW_OBJ(reg_states_iterate) +#define unw_apply_reg_state UNW_OBJ(apply_reg_state) +#define unw_get_reg UNW_OBJ(get_reg) +#define unw_set_reg UNW_OBJ(set_reg) +#define unw_get_fpreg UNW_OBJ(get_fpreg) +#define unw_set_fpreg UNW_OBJ(set_fpreg) +#define unw_get_save_loc UNW_OBJ(get_save_loc) +#define unw_is_signal_frame UNW_OBJ(is_signal_frame) +#define unw_is_plt_entry UNW_OBJ(is_plt_entry) +#define unw_get_proc_name UNW_OBJ(get_proc_name) +#define unw_get_proc_name_by_ip UNW_OBJ(get_proc_name_by_ip) +#define unw_get_elf_filename UNW_OBJ(get_elf_filename) +#define unw_get_elf_filename_by_ip UNW_OBJ(get_elf_filename_by_ip) +#define unw_set_caching_policy UNW_OBJ(set_caching_policy) +#define unw_set_cache_size UNW_OBJ(set_cache_size) +#define unw_set_iterate_phdr_function UNW_OBJ(set_iterate_phdr_function) +#define unw_regname UNW_ARCH_OBJ(regname) +#define unw_flush_cache UNW_ARCH_OBJ(flush_cache) +#define unw_strerror UNW_ARCH_OBJ(strerror) + +extern unw_addr_space_t unw_create_addr_space(unw_accessors_t *, int); +extern void unw_destroy_addr_space(unw_addr_space_t); +extern unw_accessors_t *unw_get_accessors(unw_addr_space_t); +extern unw_accessors_t *unw_get_accessors_int(unw_addr_space_t); +extern void unw_flush_cache(unw_addr_space_t, unw_word_t, unw_word_t); +extern int unw_set_caching_policy(unw_addr_space_t, unw_caching_policy_t); +extern int unw_set_cache_size(unw_addr_space_t, size_t, int); +extern void unw_set_iterate_phdr_function(unw_addr_space_t, + unw_iterate_phdr_func_t); +extern const char *unw_regname(unw_regnum_t); + +extern int unw_init_local(unw_cursor_t *, unw_context_t *); +extern int unw_init_local2(unw_cursor_t *, unw_context_t *, int); +extern int unw_init_remote(unw_cursor_t *, unw_addr_space_t, void *); +extern int unw_step(unw_cursor_t *); +extern int unw_resume(unw_cursor_t *); +extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *); +extern int unw_get_proc_info_by_ip(unw_addr_space_t, unw_word_t, + unw_proc_info_t *, void *); +extern int unw_get_proc_info_in_range(unw_word_t, unw_word_t, unw_word_t, + unw_word_t, unw_word_t, unw_word_t, + unw_addr_space_t, unw_word_t, + unw_proc_info_t *, int, void *); +extern int unw_reg_states_iterate(unw_cursor_t *, unw_reg_states_callback, + void *); +extern int unw_apply_reg_state(unw_cursor_t *, void *); +extern int unw_get_reg(unw_cursor_t *, int, unw_word_t *); +extern int unw_set_reg(unw_cursor_t *, int, unw_word_t); +extern int unw_get_fpreg(unw_cursor_t *, int, unw_fpreg_t *); +extern int unw_set_fpreg(unw_cursor_t *, int, unw_fpreg_t); +extern int unw_get_save_loc(unw_cursor_t *, int, unw_save_loc_t *); +extern int unw_is_signal_frame(unw_cursor_t *); +extern int unw_is_plt_entry(unw_cursor_t *); +extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *); +extern int unw_get_proc_name_by_ip(unw_addr_space_t, unw_word_t, char *, size_t, + unw_word_t *, void *); +extern int unw_get_elf_filename(unw_cursor_t *, char *, size_t, unw_word_t *); +extern int unw_get_elf_filename_by_ip(unw_addr_space_t, unw_word_t, char *, + size_t, unw_word_t *, void *); +extern const char *unw_strerror(int); +extern int unw_backtrace(void **, int); +extern int unw_backtrace2(void **, int, unw_context_t *, int); + +extern unw_addr_space_t unw_local_addr_space; diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind-dynamic.h b/systrace/thirdparty/x86_64/libunwind/libunwind-dynamic.h new file mode 100644 index 0000000000000000000000000000000000000000..13caf1633631ccc33d6c90ace394c539dd03f124 --- /dev/null +++ b/systrace/thirdparty/x86_64/libunwind/libunwind-dynamic.h @@ -0,0 +1,201 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2002-2004 Hewlett-Packard Co + Contributed by David Mosberger-Tang + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* This file defines the runtime-support routines for dynamically +generated code. Even though it is implemented as part of libunwind, +it is logically separate from the interface to perform the actual +unwinding. In particular, this interface is always used in the +context of the unwind target, whereas the rest of the unwind API is +used in context of the process that is doing the unwind (which may be +a debugger running on another machine, for example). + +Note that the data-structures declared here server a dual purpose: +when a program registers a dynamically generated procedure, it uses +these structures directly. On the other hand, with remote-unwinding, +the data-structures are read from the remote process's memory and +translated into internalized versions. To facilitate remote-access, +the following rules should be followed in declaring these structures: + + (1) Declare a member as a pointer only if the the information the + member points to needs to be internalized as well (e.g., a + string representing a procedure name should be declared as + "const char *", but the instruction pointer should be declared + as unw_word_t). + + (2) Provide sufficient padding to ensure that no implicit padding + will be needed on any of the supported target architectures. For + the time being, padding data structures with the assumption that + sizeof (unw_word_t) == 8 should be sufficient. (Note: it's not + impossible to internalize structures with internal padding, but + it does make the process a bit harder). + + (3) Don't declare members that contain bitfields or floating-point + values. + + (4) Don't declare members with enumeration types. Declare them as + int32_t instead. */ + +typedef enum +{ + UNW_DYN_STOP = 0, /* end-of-unwind-info marker */ + UNW_DYN_SAVE_REG, /* save register to another register */ + UNW_DYN_SPILL_FP_REL, /* frame-pointer-relative register spill */ + UNW_DYN_SPILL_SP_REL, /* stack-pointer-relative register spill */ + UNW_DYN_ADD, /* add constant value to a register */ + UNW_DYN_POP_FRAMES, /* drop one or more stack frames */ + UNW_DYN_LABEL_STATE, /* name the current state */ + UNW_DYN_COPY_STATE, /* set the region's entry-state */ + UNW_DYN_ALIAS /* get unwind info from an alias */ +} unw_dyn_operation_t; + +typedef enum +{ + UNW_INFO_FORMAT_DYNAMIC, /* unw_dyn_proc_info_t */ + UNW_INFO_FORMAT_TABLE, /* unw_dyn_table_t */ + UNW_INFO_FORMAT_REMOTE_TABLE, /* unw_dyn_remote_table_t */ + UNW_INFO_FORMAT_ARM_EXIDX, /* ARM specific unwind info */ + UNW_INFO_FORMAT_IP_OFFSET /* Like UNW_INFO_FORMAT_REMOTE_TABLE, but + table entries are considered + relative to di->start_ip, rather + than di->segbase */ +} unw_dyn_info_format_t; + +typedef struct unw_dyn_op +{ + int8_t tag; /* what operation? */ + int8_t qp; /* qualifying predicate register */ + int16_t reg; /* what register */ + int32_t when; /* when does it take effect? */ + unw_word_t val; /* auxiliary value */ +} unw_dyn_op_t; + +typedef struct unw_dyn_region_info +{ + struct unw_dyn_region_info *next; /* linked list of regions */ + int32_t insn_count; /* region length (# of instructions) */ + uint32_t op_count; /* length of op-array */ + unw_dyn_op_t op[1]; /* variable-length op-array */ +} unw_dyn_region_info_t; + +typedef struct unw_dyn_proc_info +{ + unw_word_t name_ptr; /* address of human-readable procedure name */ + unw_word_t handler; /* address of personality routine */ + uint32_t flags; + int32_t pad0; + unw_dyn_region_info_t *regions; +} unw_dyn_proc_info_t; + +typedef struct unw_dyn_table_info +{ + unw_word_t name_ptr; /* addr. of table name (e.g., library name) */ + unw_word_t segbase; /* segment base */ + unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */ + unw_word_t *table_data; +} unw_dyn_table_info_t; + +typedef struct unw_dyn_remote_table_info +{ + unw_word_t name_ptr; /* addr. of table name (e.g., library name) */ + unw_word_t segbase; /* segment base */ + unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */ + unw_word_t table_data; +} unw_dyn_remote_table_info_t; + +typedef struct unw_dyn_info +{ + /* doubly-linked list of dyn-info structures: */ + struct unw_dyn_info *next; + struct unw_dyn_info *prev; + unw_word_t start_ip; /* first IP covered by this entry */ + unw_word_t end_ip; /* first IP NOT covered by this entry */ + unw_word_t gp; /* global-pointer in effect for this entry */ + int32_t format; /* real type: unw_dyn_info_format_t */ + int32_t pad; + unw_word_t load_offset; /* ELF load offset */ + union + { + unw_dyn_proc_info_t pi; + unw_dyn_table_info_t ti; + unw_dyn_remote_table_info_t rti; + } u; +} unw_dyn_info_t; + +typedef struct unw_dyn_info_list +{ + uint32_t version; + uint32_t generation; + unw_dyn_info_t *first; +} unw_dyn_info_list_t; + +/* Return the size (in bytes) of an unw_dyn_region_info_t structure that can + hold OP_COUNT ops. */ +#define _U_dyn_region_info_size(op_count) \ + ((char *)(((unw_dyn_region_info_t *)NULL)->op + (op_count)) - (char *)NULL) + +/* Register the unwind info for a single procedure. + This routine is NOT signal-safe. */ +extern void _U_dyn_register(unw_dyn_info_t *); + +/* Cancel the unwind info for a single procedure. + This routine is NOT signal-safe. */ +extern void _U_dyn_cancel(unw_dyn_info_t *); + +/* Convenience routines. */ + +#define _U_dyn_op(_tag, _qp, _when, _reg, _val) \ + ((unw_dyn_op_t){(_tag), (_qp), (_reg), (_when), (_val)}) + +#define _U_dyn_op_save_reg(op, qp, when, reg, dst) \ + (*(op) = _U_dyn_op(UNW_DYN_SAVE_REG, (qp), (when), (reg), (dst))) + +#define _U_dyn_op_spill_fp_rel(op, qp, when, reg, offset) \ + (*(op) = _U_dyn_op(UNW_DYN_SPILL_FP_REL, (qp), (when), (reg), (offset))) + +#define _U_dyn_op_spill_sp_rel(op, qp, when, reg, offset) \ + (*(op) = _U_dyn_op(UNW_DYN_SPILL_SP_REL, (qp), (when), (reg), (offset))) + +#define _U_dyn_op_add(op, qp, when, reg, value) \ + (*(op) = _U_dyn_op(UNW_DYN_ADD, (qp), (when), (reg), (value))) + +#define _U_dyn_op_pop_frames(op, qp, when, num_frames) \ + (*(op) = _U_dyn_op(UNW_DYN_POP_FRAMES, (qp), (when), 0, (num_frames))) + +#define _U_dyn_op_label_state(op, label) \ + (*(op) = _U_dyn_op(UNW_DYN_LABEL_STATE, _U_QP_TRUE, -1, 0, (label))) + +#define _U_dyn_op_copy_state(op, label) \ + (*(op) = _U_dyn_op(UNW_DYN_COPY_STATE, _U_QP_TRUE, -1, 0, (label))) + +#define _U_dyn_op_alias(op, qp, when, addr) \ + (*(op) = _U_dyn_op(UNW_DYN_ALIAS, (qp), (when), 0, (addr))) + +#define _U_dyn_op_stop(op) \ + (*(op) = _U_dyn_op(UNW_DYN_STOP, _U_QP_TRUE, -1, 0, 0)) + +/* The target-dependent qualifying predicate which is always TRUE. On + IA-64, that's p0 (0), on non-predicated architectures, the value is + ignored. */ +#define _U_QP_TRUE _U_TDEP_QP_TRUE diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind-x86_64.h b/systrace/thirdparty/x86_64/libunwind/libunwind-x86_64.h new file mode 100644 index 0000000000000000000000000000000000000000..e9fc8177ed8618f5fdc85b588a6247c273186f28 --- /dev/null +++ b/systrace/thirdparty/x86_64/libunwind/libunwind-x86_64.h @@ -0,0 +1,146 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2002-2004 Hewlett-Packard Co + Contributed by David Mosberger-Tang + + Modified for x86_64 by Max Asbock + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef LIBUNWIND_H +#define LIBUNWIND_H + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" +{ +#endif + +#include +#include +#include +#include + +#ifndef UNW_EMPTY_STRUCT +#define UNW_EMPTY_STRUCT uint8_t unused; +#endif + +#define UNW_TARGET x86_64 +#define UNW_TARGET_X86_64 1 + +#define _U_TDEP_QP_TRUE 0 /* see libunwind-dynamic.h */ + +/* This needs to be big enough to accommodate "struct cursor", while + leaving some slack for future expansion. Changing this value will + require recompiling all users of this library. Stack allocation is + relatively cheap and unwind-state copying is relatively rare, so we + want to err on making it rather too big than too small. */ +#define UNW_TDEP_CURSOR_LEN 127 + + typedef uint64_t unw_word_t; + typedef int64_t unw_sword_t; + + typedef long double unw_tdep_fpreg_t; + +#define UNW_WORD_MAX UINT64_MAX + + typedef enum + { + UNW_X86_64_RAX, + UNW_X86_64_RDX, + UNW_X86_64_RCX, + UNW_X86_64_RBX, + UNW_X86_64_RSI, + UNW_X86_64_RDI, + UNW_X86_64_RBP, + UNW_X86_64_RSP, + UNW_X86_64_R8, + UNW_X86_64_R9, + UNW_X86_64_R10, + UNW_X86_64_R11, + UNW_X86_64_R12, + UNW_X86_64_R13, + UNW_X86_64_R14, + UNW_X86_64_R15, + UNW_X86_64_RIP, +#ifdef CONFIG_MSABI_SUPPORT + UNW_X86_64_XMM0, + UNW_X86_64_XMM1, + UNW_X86_64_XMM2, + UNW_X86_64_XMM3, + UNW_X86_64_XMM4, + UNW_X86_64_XMM5, + UNW_X86_64_XMM6, + UNW_X86_64_XMM7, + UNW_X86_64_XMM8, + UNW_X86_64_XMM9, + UNW_X86_64_XMM10, + UNW_X86_64_XMM11, + UNW_X86_64_XMM12, + UNW_X86_64_XMM13, + UNW_X86_64_XMM14, + UNW_X86_64_XMM15, + UNW_TDEP_LAST_REG = UNW_X86_64_XMM15, +#else + UNW_TDEP_LAST_REG = UNW_X86_64_RIP, +#endif + + /* XXX Add other regs here */ + + /* frame info (read-only) */ + UNW_X86_64_CFA, + + UNW_TDEP_IP = UNW_X86_64_RIP, + UNW_TDEP_SP = UNW_X86_64_RSP, + UNW_TDEP_BP = UNW_X86_64_RBP, + UNW_TDEP_EH = UNW_X86_64_RAX + } x86_64_regnum_t; + +#define UNW_TDEP_NUM_EH_REGS 2 /* XXX Not sure what this means */ + + typedef struct unw_tdep_save_loc + { + /* Additional target-dependent info on a save location. */ + UNW_EMPTY_STRUCT + } unw_tdep_save_loc_t; + + /* On x86_64, we can directly use ucontext_t as the unwind context. */ + typedef ucontext_t unw_tdep_context_t; + + typedef struct + { + /* no x86-64-specific auxiliary proc-info */ + UNW_EMPTY_STRUCT + } unw_tdep_proc_info_t; + +#include "libunwind-common.h" +#include "libunwind-dynamic.h" + +#define unw_tdep_getcontext UNW_ARCH_OBJ(getcontext) +#define unw_tdep_is_fpreg UNW_ARCH_OBJ(is_fpreg) + + extern int unw_tdep_getcontext(unw_tdep_context_t *); + extern int unw_tdep_is_fpreg(int); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* LIBUNWIND_H */ diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind.h b/systrace/thirdparty/x86_64/libunwind/libunwind.h new file mode 100644 index 0000000000000000000000000000000000000000..db092c7bcddb366ad8c21359af108547322c9c78 --- /dev/null +++ b/systrace/thirdparty/x86_64/libunwind/libunwind.h @@ -0,0 +1,40 @@ +/* Provide a real file - not a symlink - as it would cause multiarch conflicts + when multiple different arch releases are installed simultaneously. */ + +#ifndef UNW_REMOTE_ONLY + +#if defined __aarch64__ +#include "libunwind-aarch64.h" +#elif defined __arm__ +#include "libunwind-arm.h" +#elif defined __hppa__ +#include "libunwind-hppa.h" +#elif defined __ia64__ +#include "libunwind-ia64.h" +#elif defined __mips__ +#include "libunwind-mips.h" +#elif defined __powerpc__ && !defined __powerpc64__ +#include "libunwind-ppc32.h" +#elif defined __powerpc64__ +#include "libunwind-ppc64.h" +#elif defined __sh__ +#include "libunwind-sh.h" +#elif defined __i386__ +#include "libunwind-x86.h" +#elif defined __x86_64__ +#include "libunwind-x86_64.h" +#elif defined __s390x__ +#include "libunwind-s390x.h" +#elif defined __riscv || defined __riscv__ +#include "libunwind-riscv.h" +#elif defined __loongarch64 +#include "libunwind-loongarch64.h" +#else +#error "Unsupported arch" +#endif + +#else /* UNW_REMOTE_ONLY */ + +#include "libunwind-x86_64.h" + +#endif /* UNW_REMOTE_ONLY */ diff --git a/systrace/thirdparty/x86_64/libunwind/unwind.h b/systrace/thirdparty/x86_64/libunwind/unwind.h new file mode 100644 index 0000000000000000000000000000000000000000..69201dc8929eb8fcb5c63d059ca538c5fc4273a4 --- /dev/null +++ b/systrace/thirdparty/x86_64/libunwind/unwind.h @@ -0,0 +1,158 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2003 Hewlett-Packard Co + Contributed by David Mosberger-Tang + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _UNWIND_H +#define _UNWIND_H + +/* For uint64_t */ +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* Minimal interface as per C++ ABI draft standard: + + http://www.codesourcery.com/cxx-abi/abi-eh.html */ + + typedef enum + { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 + } _Unwind_Reason_Code; + + typedef int _Unwind_Action; + +#define _UA_SEARCH_PHASE 1 +#define _UA_CLEANUP_PHASE 2 +#define _UA_HANDLER_FRAME 4 +#define _UA_FORCE_UNWIND 8 + + struct _Unwind_Context; /* opaque data-structure */ + struct _Unwind_Exception; /* forward-declaration */ + + typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, + struct _Unwind_Exception *); + + typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, + uint64_t, + struct _Unwind_Exception *, + struct _Unwind_Context *, + void *); + + /* The C++ ABI requires exception_class, private_1, and private_2 to + be of type uint64 and the entire structure to be + double-word-aligned. Please note that exception_class stays 64-bit + even on 32-bit machines for gcc compatibility. */ + struct _Unwind_Exception + { + alignas(8) uint64_t exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + unsigned long private_1; + unsigned long private_2; + }; + + extern _Unwind_Reason_Code + _Unwind_RaiseException(struct _Unwind_Exception *); + extern _Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); + extern void _Unwind_Resume(struct _Unwind_Exception *); + extern void _Unwind_DeleteException(struct _Unwind_Exception *); + extern unsigned long _Unwind_GetGR(struct _Unwind_Context *, int); + extern void _Unwind_SetGR(struct _Unwind_Context *, int, unsigned long); + extern unsigned long _Unwind_GetIP(struct _Unwind_Context *); + extern unsigned long _Unwind_GetIPInfo(struct _Unwind_Context *, int *); + extern void _Unwind_SetIP(struct _Unwind_Context *, unsigned long); + extern unsigned long + _Unwind_GetLanguageSpecificData(struct _Unwind_Context *); + extern unsigned long _Unwind_GetRegionStart(struct _Unwind_Context *); + +#ifdef _GNU_SOURCE + + /* Callback for _Unwind_Backtrace(). The backtrace stops immediately + if the callback returns any value other than _URC_NO_REASON. */ + typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); + +/* See http://gcc.gnu.org/ml/gcc-patches/2001-09/msg00082.html for why + _UA_END_OF_STACK exists. */ +#define _UA_END_OF_STACK 16 + + /* If the unwind was initiated due to a forced unwind, resume that + operation, else re-raise the exception. This is used by + __cxa_rethrow(). */ + extern _Unwind_Reason_Code + _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *); + + /* See http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00154.html for why + _Unwind_GetBSP() exists. */ + extern unsigned long _Unwind_GetBSP(struct _Unwind_Context *); + + /* Return the "canonical frame address" for the given context. + This is used by NPTL... */ + extern unsigned long _Unwind_GetCFA(struct _Unwind_Context *); + + /* Return the base-address for data references. */ + extern unsigned long _Unwind_GetDataRelBase(struct _Unwind_Context *); + + /* Return the base-address for text references. */ + extern unsigned long _Unwind_GetTextRelBase(struct _Unwind_Context *); + + /* Call _Unwind_Trace_Fn once for each stack-frame, without doing any + cleanup. The first frame for which the callback is invoked is the + one for the caller of _Unwind_Backtrace(). _Unwind_Backtrace() + returns _URC_END_OF_STACK when the backtrace stopped due to + reaching the end of the call-chain or _URC_FATAL_PHASE1_ERROR if it + stops for any other reason. */ + extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + + /* Find the start-address of the procedure containing the specified IP + or NULL if it cannot be found (e.g., because the function has no + unwind info). Note: there is not necessarily a one-to-one + correspondence between source-level functions and procedures: some + functions don't have unwind-info and others are split into multiple + procedures. */ + extern void *_Unwind_FindEnclosingFunction(void *); + + /* See also Linux Standard Base Spec: + http://www.linuxbase.org/spec/refspecs/LSB_1.3.0/gLSB/gLSB/libgcc-s.html + */ + +#endif /* _GNU_SOURCE */ + +#ifdef __cplusplus +}; +#endif + +#endif /* _UNWIND_H */ diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti.h b/systrace/thirdparty/x86_64/mspti/include/mspti.h new file mode 100644 index 0000000000000000000000000000000000000000..e83c454c11cb784c7a22f82f50127f2f9d2a368c --- /dev/null +++ b/systrace/thirdparty/x86_64/mspti/include/mspti.h @@ -0,0 +1,19 @@ +/** + * @file mspti.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_H +#define MSPTI_H + +#include "mspti_activity.h" +#include "mspti_callback.h" +#include "mspti_cbid.h" +#include "mspti_result.h" + +#endif diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_activity.h b/systrace/thirdparty/x86_64/mspti/include/mspti_activity.h new file mode 100644 index 0000000000000000000000000000000000000000..30f71598d073b9637c9ec440939f30f65ef30e74 --- /dev/null +++ b/systrace/thirdparty/x86_64/mspti/include/mspti_activity.h @@ -0,0 +1,424 @@ +/** + * @file mspti_activity.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_ACTIVITY_H +#define MSPTI_ACTIVITY_H + +#define ACTIVITY_STRUCT_ALIGNMENT 8 +#if defined(_WIN32) +#define START_PACKED_ALIGNMENT __pragma(pack(push, 1)) +#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_STRUCT_ALIGNMENT)) +#define END_PACKED_ALIGNMENT __pragma(pack(pop)) +#elif defined(__GNUC__) +#define START_PACKED_ALIGNMENT +#define PACKED_ALIGNMENT \ + __attribute__((__packed__)) \ + __attribute__((aligned(ACTIVITY_STRUCT_ALIGNMENT))) +#define END_PACKED_ALIGNMENT +#else +#define START_PACKED_ALIGNMENT +#define PACKED_ALIGNMENT +#define END_PACKED_ALIGNMENT +#endif + +#include "mspti_result.h" +#include +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility push(default) +#endif + + /** + * @brief The kinds of activity records. + * + * Each kind is associated with a + * activity record structure that holds the information associated + * with the kind. + */ + typedef enum + { + /** + * The activity record is invalid. + */ + MSPTI_ACTIVITY_KIND_INVALID = 0, + MSPTI_ACTIVITY_KIND_MARKER = 1, + MSPTI_ACTIVITY_KIND_KERNEL = 2, + MSPTI_ACTIVITY_KIND_API = 3, + MSPTI_ACTIVITY_KIND_COUNT, + MSPTI_ACTIVITY_KIND_FORCE_INT = 0x7fffffff + } msptiActivityKind; + + /** + * @brief The source kinds of mark data. + * + * Each mark activity record kind represents information about host or + * device + */ + typedef enum + { + MSPTI_ACTIVITY_SOURCE_KIND_HOST = 0, + MSPTI_ACTIVITY_SOURCE_KIND_DEVICE = 1 + } msptiActivitySourceKind; + + /** + * @brief Flags linked to activity records. + * + * These are the Flags that pertain to activity records. + * Flags can be combined by bitwise OR to + * associated multiple flags with an activity record. + */ + typedef enum + { + /** + * Signifies that the activity record lacks any flags. + */ + MSPTI_ACTIVITY_FLAG_NONE = 0, + /** + * Represents the activity as a pure host instantaneous marker. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS = 1 << 0, + /** + * Represents the activity as a pure host region start marker. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_START = 1 << 1, + /** + * Represents the activity as a pure host region end marker. Works with + * MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_END = 1 << 2, + /** + * Represents the activity as an instantaneous marker with device. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS_WITH_DEVICE = 1 << 3, + /** + * Represents the activity as a pure start marker with device. Works + * with MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_START_WITH_DEVICE = 1 << 4, + /** + * Represents the activity as a pure end marker with device. Works with + * MSPTI_ACTIVITY_KIND_MARKER. + */ + MSPTI_ACTIVITY_FLAG_MARKER_END_WITH_DEVICE = 1 << 5 + } msptiActivityFlag; + + START_PACKED_ALIGNMENT + + typedef struct PACKED_ALIGNMENT + { + msptiActivityKind kind; + } msptiActivity; + + typedef union PACKED_ALIGNMENT + { + /** + * A thread object requires that we identify both the process and + * thread ID. + */ + struct + { + uint32_t processId; + uint32_t threadId; + } pt; + /** + * A stream object requires that we identify device and stream ID. + */ + struct + { + uint32_t deviceId; + uint32_t streamId; + } ds; + } msptiObjectId; + + /** + * @brief This activity record serves as a marker, representing a specific + * moment in time. + * + * The marker is characterized by a distinctive name and a unique identifier + */ + typedef struct PACKED_ALIGNMENT + { + /** + * The activity record kind, always be MSPTI_ACTIVITY_KIND_MARKER. + */ + msptiActivityKind kind; + + /** + * The flags associated with the marker. + * @see msptiActivityFlag + */ + msptiActivityFlag flag; + + /** + * The source kinds of mark data. + * @see msptiActivitySourceKind + */ + msptiActivitySourceKind sourceKind; + + /** + * The timestamp for the marker, in ns. A value of 0 indicates that + * timestamp information could not be collected for the marker. + */ + uint64_t timestamp; + + /** + * The marker ID. + */ + uint64_t id; + + /** + * The identifier for the activity object associated with this + * marker. 'objectKind' indicates which ID is valid for this record. + */ + msptiObjectId objectId; + + /** + * The marker name for an instantaneous or start marker. + * This will be NULL for an end marker. + */ + const char *name; + + /** + * The name of the domain to which this marker belongs to. + * This will be NULL for default domain. + */ + const char *domain; + } msptiActivityMarker; + + typedef struct PACKED_ALIGNMENT + { + /** + * The activity record kind, must be MSPTI_ACTIVITY_KIND_API. + */ + msptiActivityKind kind; + + /** + * The start timestamp for the api, in ns. + */ + uint64_t start; + + /** + * The end timestamp for the api, in ns. + */ + uint64_t end; + + /** + * A thread object requires that we identify both the process and + * thread ID. + */ + struct + { + uint32_t processId; + uint32_t threadId; + } pt; + + /** + * The correlation ID of the kernel. + */ + uint64_t correlationId; + + /** + * The api name. + */ + const char *name; + } msptiActivityApi; + + typedef struct PACKED_ALIGNMENT + { + /** + * The activity record kind, must be MSPTI_ACTIVITY_KIND_KERNEL. + */ + msptiActivityKind kind; + + /** + * The start timestamp for the kernel, in ns. + */ + uint64_t start; + + /** + * The end timestamp for the kernel, in ns. + */ + uint64_t end; + + /** + * A stream object requires that we identify device and stream ID. + */ + struct + { + uint32_t deviceId; + uint32_t streamId; + } ds; + + /** + * The correlation ID of the kernel. + */ + uint64_t correlationId; + + /** + * The kernel type. + */ + const char *type; + + /** + * The kernel name. + */ + const char *name; + } msptiActivityKernel; + + END_PACKED_ALIGNMENT + + /** + * @brief Function type for callback used by MSPTI to request an empty + * buffer for storing activity records. + * + * This callback function signals the MSPTI client that an activity + * buffer is needed by MSPTI. The activity buffer is used by MSPTI to + * store activity records. The callback function can decline the + * request by setting **buffer to NULL. In this case MSPTI may drop + * activity records. + * + * @param buffer Returns the new buffer. If set to NULL then no buffer + * is returned. + * @param size Returns the size of the returned buffer. + * @param maxNumRecords Returns the maximum number of records that + * should be placed in the buffer. If 0 then the buffer is filled with + * as many records as possible. If > 0 the buffer is filled with at + * most that many records before it is returned. + */ + typedef void (*msptiBuffersCallbackRequestFunc)(uint8_t **buffer, + size_t *size, + size_t *maxNumRecords); + + /** + * @brief Function type for callback used by MSPTI to return a buffer + * of activity records. + * + * This callback function returns to the MSPTI client a buffer + * containing activity records. The buffer contains @p validSize + * bytes of activity records which should be read using + * msptiActivityGetNextRecord. After this call MSPTI + * relinquished ownership of the buffer and will not use it + * anymore. The client may return the buffer to MSPTI using the + * msptiBuffersCallbackRequestFunc callback. + * + * @param buffer The activity record buffer. + * @param size The total size of the buffer in bytes as set in + * MSPTI_BuffersCallbackRequestFunc. + * @param validSize The number of valid bytes in the buffer. + */ + typedef void (*msptiBuffersCallbackCompleteFunc)(uint8_t *buffer, + size_t size, + size_t validSize); + + /** + * @brief Registers callback functions with MSPTI for activity buffer + * handling. + * + * This function registers two callback functions to be used in asynchronous + * buffer handling. If registered, activity record buffers are handled using + * asynchronous requested/completed callbacks from MSPTI. + * + * @param funcBufferRequested callback which is invoked when an empty + * buffer is requested by MSPTI + * @param funcBufferCompleted callback which is invoked when a buffer + * containing activity records is available from MSPTI + * + * @retval MSPTI_SUCCESS + * @retval MSPTI_ERROR_INVALID_PARAMETER if either + * funcBufferRequested or funcBufferCompleted is NULL + */ + msptiResult msptiActivityRegisterCallbacks( + msptiBuffersCallbackRequestFunc funcBufferRequested, + msptiBuffersCallbackCompleteFunc funcBufferCompleted); + + /** + * @brief Enable collection of a specific kind of activity record. + * + * Enable collection of a specific kind of activity record. Multiple + * kinds can be enabled by calling this function multiple times. + * By default, the collection of all activity types is inactive. + * + * @param kind The kind of activity record to collect + * + * @retval MSPTI_SUCCESS + */ + msptiResult msptiActivityEnable(msptiActivityKind kind); + + /** + * @brief Disable collection of a specific kind of activity record. + * + * Disable collection of a specific kind of activity record. Multiple + * kinds can be disabled by calling this function multiple times. + * By default, the collection of all activity types is inactive. + * + * @param kind The kind of activity record to stop collecting + * + * @retval MSPTI_SUCCESS + */ + msptiResult msptiActivityDisable(msptiActivityKind kind); + + /** + * @brief Iterate over the activity records in a buffer. + * + * This is a function to iterate over the activity records in buffer. + * + * @param buffer The buffer containing activity records + * @param validBufferSizeBytes The number of valid bytes in the buffer. + * @param record Inputs the previous record returned by + * msptiActivityGetNextRecord and returns the next activity record + * from the buffer. If input value is NULL, returns the first activity + * record in the buffer. + * + * @retval MSPTI_SUCCESS + * @retval MSPTI_ERROR_MAX_LIMIT_REACHED if no more records in the buffer + * @retval MSPTI_ERROR_INVALID_PARAMETER if buffer is NULL. + */ + msptiResult msptiActivityGetNextRecord(uint8_t *buffer, + size_t validBufferSizeBytes, + msptiActivity **record); + + /** + * @brief Request to deliver activity records via the buffer completion + * callback. + * + * This function returns the activity records associated with all + * contexts/streams (and the global buffers not associated with any stream) + * to the MSPTI client using the callback registered in + * msptiActivityRegisterCallbacks. It return all activity buffers that + * contain completed activity records, even if these buffers are not + * completely filled. + * + * Before calling this function, the buffer handling callback api must be + * activated by calling msptiActivityRegisterCallbacks. + * + * @param flag Reserved for internal use. + * + * @retval MSPTI_SUCCESS + */ + msptiResult msptiActivityFlushAll(uint32_t flag); + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility pop +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_callback.h b/systrace/thirdparty/x86_64/mspti/include/mspti_callback.h new file mode 100644 index 0000000000000000000000000000000000000000..2e6f7ee2264b9e99f5f891fdc6ac3cd20d53bf66 --- /dev/null +++ b/systrace/thirdparty/x86_64/mspti/include/mspti_callback.h @@ -0,0 +1,258 @@ +/** + * @file mspti_callback.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_CALLBACK_H +#define MSPTI_CALLBACK_H + +#include "mspti_cbid.h" +#include "mspti_result.h" +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility push(default) +#endif + + /** + * @brief Callback domains. + * + * Callback domains. Each domain represents callback points for a + * group of related API functions or CANN driver activity. + */ + typedef enum + { + /** + * Invalid domain. + */ + MSPTI_CB_DOMAIN_INVALID = 0, + /** + * Domain containing callback points for all runtime API functions. + */ + MSPTI_CB_DOMAIN_RUNTIME = 1, + MSPTI_CB_DOMAIN_HCCL = 2, + MSPTI_CB_DOMAIN_SIZE, + MSPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff + } msptiCallbackDomain; + + typedef uint32_t msptiCallbackId; + + /** + * @brief Specifies the point in an API call that a callback is issued. + * + * Specifies the point in an API call that a callback is issued. This + * value is communicated to the callback function by @ref + * msptiCallbackData::callbackSite. + */ + typedef enum + { + /** + * The callback is at the entry of the API call. + */ + MSPTI_API_ENTER = 0, + /** + * The callback is at the exit of the API call. + */ + MSPTI_API_EXIT = 1, + MSPTI_API_CBSITE_FORCE_INT = 0x7fffffff + } msptiApiCallbackSite; + + typedef struct + { + /** + * Point in the runtime or driver function from where the callback + * was issued. + */ + msptiApiCallbackSite callbackSite; + + /** + * Name of the runtime or driver API function which issued the + * callback. + */ + const char *functionName; + + /** + * Params of the runtime or driver API function which issued the + * callback. + */ + const void *functionParams; + + /** + * Pointer to the return value of the runtime or driver API + * call. + */ + const void *functionReturnValue; + + /** + * Name of the symbol operated on by the runtime or driver API + * function which issued the callback. This entry is valid only for + * driver and runtime launch callbacks, where it returns the name of + * the kernel. + */ + const char *symbolName; + + /** + * The activity record correlation ID for this callback. For a + * driver domain callback (i.e. @p domain + * MSPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID + * in the MSPTI_ActivityAPI record corresponding to the CANN driver + * function call. For a runtime domain callback (i.e. @p domain + * MSPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation + * ID in the MSPTI_ActivityAPI record corresponding to the CANN + * runtime function call. Within the callback, this ID can be + * recorded to correlate user data with the activity record. + */ + uint64_t correlationId; + + /** + * Undefined. Reserved for internal use. + */ + uint64_t reserved1; + + /** + * Undefined. Reserved for internal use. + */ + uint64_t reserved2; + + /** + * Pointer to data shared between the entry and exit callbacks of + * a given runtime or drive API function invocation. This field + * can be used to pass 64-bit values from the entry callback to + * the corresponding exit callback. + */ + uint64_t *correlationData; + } msptiCallbackData; + + /** + * @brief Function type for a callback. + * + * Function type for a callback. The type of the data passed to the + * callback in @p cbdata depends on the @p domain. If @p domain is + * MSPTI_CB_DOMAIN_RUNTIME the type + * of @p cbdata will be msptiCallbackData. + * + * @param userdata User data supplied at subscription of the callback + * @param domain The domain of the callback + * @param cbid The ID of the callback + * @param cbdata Data passed to the callback. + */ + typedef void (*msptiCallbackFunc)(void *userdata, + msptiCallbackDomain domain, + msptiCallbackId cbid, + const msptiCallbackData *cbdata); + + struct msptiSubscriber_st; + + /** + * @brief A callback subscriber. + */ + typedef struct msptiSubscriber_st *msptiSubscriberHandle; + + /** + * @brief Initialize a callback subscriber with a callback function + * and user data. + * + * Initializes a callback subscriber with a callback function and + * (optionally) a pointer to user data. The returned subscriber handle + * can be used to enable and disable the callback for specific domains + * and callback IDs. + * @note Only a single subscriber can be registered at a time. To ensure + * that no other MSPTI client interrupts the profiling session, it's the + * responsibility of all the MSPTI clients to call this function before + * starting the profling session. + * @note This function does not enable any callbacks. + * @note @b Thread-safety: this function is thread safe. + * + * @param subscriber handle to initialize subscriber + * @param callback The callback function + * @param userdata A pointer to user data. This data will be passed to + * the callback function via the @p userdata paramater. + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INNER if unable to initialize MSPTI + * @retval MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is + * already a MSPTI subscriber + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL + */ + msptiResult msptiSubscribe(msptiSubscriberHandle *subscriber, + msptiCallbackFunc callback, void *userdata); + + /** + * @brief Unregister a callback subscriber. + * + * Removes a callback subscriber so that no future callbacks will be + * issued to that subscriber. + * + * @param subscriber Handle to the initialize subscriber + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL or not + * initialized + */ + msptiResult msptiUnsubscribe(msptiSubscriberHandle subscriber); + + /** + * @brief Enable or disabled callbacks for a specific domain and + * callback ID. + * + * Enable or disabled callbacks for a subscriber for a specific domain + * and callback ID. + * + * @note @b Thread-safety: a subscriber must serialize access to + * msptiEnableCallback, msptiEnableDomain. + * + * @param enable New enable state for the callback. Zero disables the + * callback, non-zero enables the callback. + * @param subscriber Handle to callback subscription + * @param domain The domain of the callback + * @param cbid The ID of the callback + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain or @p + * cbid is invalid. + */ + msptiResult msptiEnableCallback(uint32_t enable, + msptiSubscriberHandle subscriber, + msptiCallbackDomain domain, + msptiCallbackId cbid); + + /** + * @brief Enable or disabled callbacks for a specific domain + * + * Enable or disabled callbacks for a subscriber for a specific domain + * + * @note @b Thread-safety: a subscriber must serialize access to + * msptiEnableCallback, msptiEnableDomain. + * + * @param enable New enable state for the callback. Zero disables the + * callback, non-zero enables the callback. + * @param subscriber Handle to callback subscription + * @param domain The domain of the callback + * + * @retval MSPTI_SUCCESS on success + * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain is + * invalid. + */ + msptiResult msptiEnableDomain(uint32_t enable, + msptiSubscriberHandle subscriber, + msptiCallbackDomain domain); + +#if defined(__GNUC__) && defined(MSPTI_LIB) +#pragma GCC visibility pop +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_cbid.h b/systrace/thirdparty/x86_64/mspti/include/mspti_cbid.h new file mode 100644 index 0000000000000000000000000000000000000000..540ad394376e5a9f6bb74fb0a53c9072a24b1a9c --- /dev/null +++ b/systrace/thirdparty/x86_64/mspti/include/mspti_cbid.h @@ -0,0 +1,83 @@ +/** + * @file mspti_cbid.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_CBID_H +#define MSPTI_CBID_H + +/** + * @brief Definitions of indices for Runtime API functions, unique across entire + * API + */ +typedef enum +{ + MSPTI_CBID_RUNTIME_INVALID = 0, + MSPTI_CBID_RUNTIME_DEVICE_SET = 1, + MSPTI_CBID_RUNTIME_DEVICE_RESET = 2, + MSPTI_CBID_RUNTIME_DEVICE_SET_EX = 3, + MSPTI_CBID_RUNTIME_CONTEXT_CREATED_EX = 4, + MSPTI_CBID_RUNTIME_CONTEXT_CREATED = 5, + MSPTI_CBID_RUNTIME_CONTEXT_DESTROY = 6, + MSPTI_CBID_RUNTIME_STREAM_CREATED = 7, + MSPTI_CBID_RUNTIME_STREAM_DESTROY = 8, + MSPTI_CBID_RUNTIME_STREAM_SYNCHRONIZED = 9, + MSPTI_CBID_RUNTIME_LAUNCH = 10, + MSPTI_CBID_RUNTIME_CPU_LAUNCH = 11, + MSPTI_CBID_RUNTIME_AICPU_LAUNCH = 12, + MSPTI_CBID_RUNTIME_AIV_LAUNCH = 13, + MSPTI_CBID_RUNTIME_FFTS_LAUNCH = 14, + MSPTI_CBID_RUNTIME_MALLOC = 15, + MSPTI_CBID_RUNTIME_FREE = 16, + MSPTI_CBID_RUNTIME_MALLOC_HOST = 17, + MSPTI_CBID_RUNTIME_FREE_HOST = 18, + MSPTI_CBID_RUNTIME_MALLOC_CACHED = 19, + MSPTI_CBID_RUNTIME_FLUSH_CACHE = 20, + MSPTI_CBID_RUNTIME_INVALID_CACHE = 21, + MSPTI_CBID_RUNTIME_MEMCPY = 22, + MSPTI_CBID_RUNTIME_MEMCPY_HOST = 23, + MSPTI_CBID_RUNTIME_MEMCPY_ASYNC = 24, + MSPTI_CBID_RUNTIME_MEM_CPY2D = 25, + MSPTI_CBID_RUNTIME_MEM_CPY2D_ASYNC = 26, + MSPTI_CBID_RUNTIME_MEM_SET = 27, + MSPTI_CBID_RUNTIME_MEM_SET_ASYNC = 28, + MSPTI_CBID_RUNTIME_MEM_GET_INFO = 29, + MSPTI_CBID_RUNTIME_RESERVE_MEM_ADDRESS = 30, + MSPTI_CBID_RUNTIME_RELEASE_MEM_ADDRESS = 31, + MSPTI_CBID_RUNTIME_MALLOC_PHYSICAL = 32, + MSPTI_CBID_RUNTIME_FREE_PHYSICAL = 33, + MSPTI_CBID_RUNTIME_MEM_EXPORT_TO_SHAREABLE_HANDLE = 34, + MSPTI_CBID_RUNTIME_MEM_IMPORT_FROM_SHAREABLE_HANDLE = 35, + MSPTI_CBID_RUNTIME_MEM_SET_PID_TO_SHAREABLE_HANDLE = 36, + MSPTI_CBID_RUNTIME_SIZE, + MSPTI_CBID_RUNTIME_FORCE_INT = 0x7fffffff +} msptiCallbackIdRuntime; + +/** + * @brief Definitions of indices for hccl API functions + */ +typedef enum +{ + MSPTI_CBID_HCCL_INVALID = 0, + MSPTI_CBID_HCCL_ALLREDUCE = 1, + MSPTI_CBID_HCCL_BROADCAST = 2, + MSPTI_CBID_HCCL_ALLGATHER = 3, + MSPTI_CBID_HCCL_REDUCE_SCATTER = 4, + MSPTI_CBID_HCCL_REDUCE = 5, + MSPTI_CBID_HCCL_ALL_TO_ALL = 6, + MSPTI_CBID_HCCL_ALL_TO_ALLV = 7, + MSPTI_CBID_HCCL_BARRIER = 8, + MSPTI_CBID_HCCL_SCATTER = 9, + MSPTI_CBID_HCCL_SEND = 10, + MSPTI_CBID_HCCL_RECV = 11, + MSPTI_CBID_HCCL_SENDRECV = 12, + MSPTI_CBID_HCCL_SIZE, + MSPTI_CBID_HCCL_FORCE_INT = 0x7fffffff +} msptiCallbackIdHccl; + +#endif diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_result.h b/systrace/thirdparty/x86_64/mspti/include/mspti_result.h new file mode 100644 index 0000000000000000000000000000000000000000..902647eed2e5efc7b69f2d2dd865e228d4a22d0e --- /dev/null +++ b/systrace/thirdparty/x86_64/mspti/include/mspti_result.h @@ -0,0 +1,30 @@ +/** + * @file mspti_result.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef MSPTI_BASE_H +#define MSPTI_BASE_H + +/** + * @brief MSPTI result codes. + * + * Error and result codes returned by MSPTI functions. + */ +typedef enum +{ + MSPTI_SUCCESS = 0, + MSPTI_ERROR_INVALID_PARAMETER = 1, + MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 2, + MSPTI_ERROR_MAX_LIMIT_REACHED = 3, + MSPTI_ERROR_DEVICE_OFFLINE = 4, + MSPTI_ERROR_INNER = 999, + MSPTI_ERROR_FOECE_INT = 0x7fffffff +} msptiResult; + +#endif diff --git a/systrace/thirdparty/x86_64/mspti/lib64/libmspti.so b/systrace/thirdparty/x86_64/mspti/lib64/libmspti.so new file mode 100644 index 0000000000000000000000000000000000000000..79f2ec422f26585fd16f8b9ff95318447f8458fc Binary files /dev/null and b/systrace/thirdparty/x86_64/mspti/lib64/libmspti.so differ diff --git a/systrace/watchdog/watchdog.py b/systrace/watchdog/watchdog.py new file mode 100644 index 0000000000000000000000000000000000000000..5d594523692df0efd882bbeff5a3adaba1d21b56 --- /dev/null +++ b/systrace/watchdog/watchdog.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +系统监控脚本 v1.0 +功能:双配置文件监控、环境变量更新、进程逃生管理 +""" +import os +import json +import time +import signal +import logging +import argparse +import subprocess +import threading +from schedule import every, run_pending +from datetime import datetime + +# 配置全局参数 +DEFAULT_TRACE_FILE = "trace.json" +DEFAULT_LOOP_TIME = 3 +DEFAULT_LOG_FILE = "log.json" +ENV_MAPPING = { + "sysTraceD_L0": "sysTraceD_L0", + "sysTraceD_L1": "sysTraceD_L1", + "sysTraceD_L2": "sysTraceD_L2", + "sysTraceD_L3": "sysTraceD_L3", + "escape_switch": "sysTrace_Escape" +} + +class SystemMonitor: + def __init__(self, trace_file): + self.trace_file = trace_file + self.sysTrace_pid = None + self._setup_logging() + self._parse_args() + self.start_sysTrace() + + def _setup_logging(self): + """配置日志记录""" + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s: %(message)s', + handlers=[logging.FileHandler('monitor.log'), logging.StreamHandler()] + ) + + def _parse_args(self): + """解析命令行参数""" + parser = argparse.ArgumentParser(description='System Monitor Daemon') + parser.add_argument('--trace', type=str, default=DEFAULT_TRACE_FILE, + help='Path to trace config (default: trace.json)') + parser.add_argument('--tracelog', type=str, default=DEFAULT_LOG_FILE, + help='Path to trace config (default: log.json)') + self.args = parser.parse_args() + + def start_sysTrace(self): + """启动监控进程""" + try: + proc = subprocess.Popen(["sysTrace"], stdout=subprocess.PIPE) + self.sysTrace_pid = proc.pid + logging.info(f"Started sysTrace (PID: {self.sysTrace_pid})") + except FileNotFoundError: + logging.error("sysTrace executable not found in PATH") + + def kill_sysTrace(self): + """终止监控进程""" + if self.sysTrace_pid and os.path.exists(f"/proc/{self.sysTrace_pid}"): + os.kill(self.sysTrace_pid, signal.SIGTERM) + logging.info(f"Killed sysTrace (PID: {self.sysTrace_pid})") + self.sysTrace_pid = None + + @staticmethod + def validate_json(file_path): + """验证JSON文件格式""" + try: + with open(file_path) as f: + json.load(f) + return True + except (json.JSONDecodeError, FileNotFoundError) as e: + logging.error(f"Invalid JSON {file_path}: {str(e)}") + return False + + def check_systrace_stream(self, file_path, buffer_size=4096): + """内存优化的流式检查""" + target = "i am sysTrace" + window = "" + + try: + with open(file_path, 'r') as f: + while (chunk := f.read(buffer_size)) : + window += chunk + if target in window: + logging.info(f"find str") + return True + # 保留可能跨分块的尾部字符 + window = window[-len(target):] if len(window) > len(target) else window + return False + except FileNotFoundError: + print(f"文件 {file_path} 不存在") + return False + + def update_env_vars(self): + """更新环境变量""" + if not self.validate_json(self.args.trace): + return + try: + with open(self.args.trace, 'r') as f: + data = json.load(f) + for key, env_var in ENV_MAPPING.items(): + value = str(data.get(key, "false")).lower() + os.environ[env_var] = value + logging.info(f"Set {env_var}={value}") + except Exception as e: + logging.error(f"Env update failed: {str(e)}") + + def check_escape_trigger(self): + """检查逃生开关""" + if not self.validate_json(self.args.trace): + return + try: + with open(self.args.trace, 'r') as f: + data = json.load(f) + if data.get("escape_switch", False) is True: + self.kill_sysTrace() + except Exception as e: + logging.error(f"Escape check failed: {str(e)}") + + def scheduler_task(self): + """定时任务调度""" + every(DEFAULT_LOOP_TIME).seconds.do(self.update_env_vars) + every(DEFAULT_LOOP_TIME).seconds.do(self.check_escape_trigger) + every(DEFAULT_LOOP_TIME).seconds.do(self.check_systrace_stream, self.args.tracelog) + while True: + run_pending() + time.sleep(1) + + def run(self): + """主运行循环""" + threading.Thread(target=self.scheduler_task, daemon=True).start() + try: + while True: + time.sleep(3600) + except KeyboardInterrupt: + self.kill_sysTrace() + logging.info("Service stopped") + +if __name__ == "__main__": + monitor = SystemMonitor(DEFAULT_TRACE_FILE) + monitor.run() \ No newline at end of file