diff --git a/systrace/.clang-format b/systrace/.clang-format
new file mode 100644
index 0000000000000000000000000000000000000000..466d8df6a1d2760cdbcf4a051bb4edba51530154
--- /dev/null
+++ b/systrace/.clang-format
@@ -0,0 +1,5 @@
+BasedOnStyle: LLVM
+IndentWidth: 4
+BreakBeforeBraces: Allman
+UseTab: Never
+TabWidth: 4
diff --git a/systrace/CMakeLists.txt b/systrace/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4856e4521330306949be0053c0c38c80745478c1
--- /dev/null
+++ b/systrace/CMakeLists.txt
@@ -0,0 +1,70 @@
+cmake_minimum_required(VERSION 3.10)
+project(sysTrace)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_SKIP_RPATH TRUE)
+set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+set(CMAKE_INSTALL_RPATH "")
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64")
+    set(UNWIND_LIB "unwind-aarch64")
+    set(MSPTI_INCLUDE "${PROJECT_SOURCE_DIR}/thirdparty/aarch64/mspti/include")
+    set(MSPTI_LIB "${PROJECT_SOURCE_DIR}/thirdparty/aarch64/mspti/lib64")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
+    set(UNWIND_LIB "unwind")
+    set(MSPTI_INCLUDE "${PROJECT_SOURCE_DIR}/thirdparty/x86_64/mspti/include")
+    set(MSPTI_LIB "${PROJECT_SOURCE_DIR}/thirdparty/x86_64/mspti/lib64")
+else()
+    message(WARNING "Unknown architecture: ${CMAKE_SYSTEM_PROCESSOR}")
+    set(UNWIND_LIB "unwind")
+endif()
+include_directories(
+    ${MSPTI_INCLUDE}
+)
+
+find_package(Python3 REQUIRED COMPONENTS Development)
+find_package(Protobuf REQUIRED)
+find_package(Threads REQUIRED)
+
+find_library(ASCEND_MSPTI 
+    NAMES mspti
+    HINTS ${MSPTI_LIB}
+)
+if(NOT ASCEND_MSPTI)
+    message(FATAL_ERROR "Ascend mspti library not found!")
+endif()
+
+add_library(common STATIC
+    ${PROJECT_SOURCE_DIR}/include/common/logging.cc
+    ${PROJECT_SOURCE_DIR}/include/common/util.cc
+)
+target_include_directories(common PUBLIC ${PROJECT_SOURCE_DIR}/include ${Python3_INCLUDE_DIRS})
+
+
+add_subdirectory(protos)
+
+add_library(sysTrace_hook SHARED
+    ${PROJECT_SOURCE_DIR}/src/trace/systrace_manager.cc
+    ${PROJECT_SOURCE_DIR}/src/trace/library_loader.cc
+    ${PROJECT_SOURCE_DIR}/src/trace/python/pytorch_tracing_loader.cc
+    ${PROJECT_SOURCE_DIR}/src/trace/python/pytorch_tracing_manager.cc
+    ${PROJECT_SOURCE_DIR}/src/trace/python/pytorch_tracing.c
+    ${PROJECT_SOURCE_DIR}/src/ascend/hook.cc
+    ${PROJECT_SOURCE_DIR}/src/mspti/mspti_tracker.cpp
+    ${PROJECT_SOURCE_DIR}/src/cann/cann_hook.c
+)
+
+set_target_properties(sysTrace_hook PROPERTIES OUTPUT_NAME "sysTrace")
+
+target_link_libraries(sysTrace_hook
+    common
+    general_pb2
+    ${Python3_LIBRARIES}
+    protobuf::libprotobuf
+    ${CMAKE_THREAD_LIBS}
+    pthread
+    jsoncpp
+    -ldl
+)
diff --git a/systrace/build.sh b/systrace/build.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3040c97c7af451e8fb35e834b1f0b20a0cb8be67
--- /dev/null
+++ b/systrace/build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+sudo dnf remove -y libunwind libunwind-devel 2>/dev/null || true
+mkdir -p build
+
+cd protos
+protoc --c_out=. systrace.proto
+protoc --cpp_out=. systrace.proto
+protoc --python_out=. systrace.proto
+cd ..
+cd build
+cmake ..
+make -j $(nproc)
diff --git a/systrace/convert/convert_json2csv.py b/systrace/convert/convert_json2csv.py
new file mode 100644
index 0000000000000000000000000000000000000000..55b8ea24f25b8592ae894929788089ae7c671ec1
--- /dev/null
+++ b/systrace/convert/convert_json2csv.py
@@ -0,0 +1,55 @@
+# coding=utf-8
+"""
+Copyright (c) Huawei Technologies Co., Ltd. 2020-2028. All rights reserved.
+Description:
+FileName：convert_json2_csv.py
+Author: h00568282/huangbin 
+Create Date: 2025/3/28 16:17
+Notes:
+
+"""
+import os
+import json
+import pandas as pd
+from util.logging_utils import get_default_logger
+
+logger = get_default_logger(__name__)
+
+
+def convert_json2csv(json_path):
+    csv_path = f"{json_path[:-5]}.csv"
+    if os.path.exists(csv_path):
+        return
+
+    try:
+        with open(json_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+            content = content.replace(']\n[', ',').strip()
+            json_data = json.loads(content)
+    except:
+        logger.error("json data read error")
+        json_data = None
+
+    if not json_data:
+        return
+    df = pd.json_normalize(json_data, sep='_')
+
+    logger.info(f"save path: {csv_path}")
+    df.to_csv(csv_path, index=False)
+
+
+def convert_jsons2csv(root_path):
+    json_files = [file for file in os.listdir(root_path) if file.endswith("json")]
+
+    for json_file in json_files:
+        logger.info(f"{json_file}")
+        json_path = os.path.join(root_path, json_file)
+        convert_json2csv(json_path)
+
+
+if __name__ == "__main__":
+    # json_path = "./data/json_data/hccl_activity.3.json"
+    # convert_json2csv(json_path)
+
+    root_path = "./data/json_tp4dp1"
+    convert_jsons2csv(root_path)
\ No newline at end of file
diff --git a/systrace/convert/convert_mem_to_flamegraph.py b/systrace/convert/convert_mem_to_flamegraph.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd80b78b075b73955c728b802398f1687b6888e5
--- /dev/null
+++ b/systrace/convert/convert_mem_to_flamegraph.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+import sys
+import json
+import os
+import subprocess
+from collections import defaultdict, deque
+from concurrent.futures import ThreadPoolExecutor
+from systrace_pb2 import ProcMem, StageType
+
+class FixedFlameGraphConverter:
+    def __init__(self):
+        self.stage_names = {
+            StageType.STAGE_UNKNOWN: "UNKNOWN",
+            StageType.STAGE_DATALOADER: "DATALOADER",
+            StageType.STAGE_FORWARD: "FORWARD",
+            StageType.STAGE_BACKWARD: "BACKWARD",
+            StageType.STAGE_SYNCHRONIZATION: "SYNCHRONIZATION",
+            getattr(StageType, "STAGE_GC", 5): "GC"
+        }
+        self.symbol_cache = {}
+        self.so_path_cache = {}
+        self.executor = ThreadPoolExecutor(max_workers=os.cpu_count() or 4)
+
+    def convert(self, input_pb, output_json):
+        proc_mem = self._load_proc_mem(input_pb)
+        alloc_groups = self._analyze_allocations(proc_mem)
+        self._precache_symbols(alloc_groups)
+        
+        trace_events = []
+        global_timestamp = 0
+        
+        # 按stage_name分组处理
+        stage_data = defaultdict(list)
+        for (stage_type, stage_id), allocs in alloc_groups.items():
+            stage_name = f"{stage_id}_{self.stage_names.get(stage_type, 'UNKNOWN')}"
+            stage_data[stage_name].extend(allocs)
+        
+        for stage_name, allocs in stage_data.items():
+            # if any(s in stage_name for s in ["0_", "1_", "2_"]):
+            #     continue
+                
+            # 生成该stage的所有事件
+            stage_events = []
+            min_ts = global_timestamp
+            max_ts = global_timestamp + sum(a.mem_size for a in allocs)
+            
+            # 先添加容器事件（强制置顶）
+            container_event = {
+                "name": stage_name,
+                "ph": "X",
+                "ts": min_ts,
+                "dur": max_ts - min_ts,
+                "pid": proc_mem.pid,
+                "tid": proc_mem.pid,
+                "args": {
+                    "stage_type": self.stage_names.get(next(iter(alloc_groups.keys()))[0], "UNKNOWN"),
+                    "stage_id": next(iter(alloc_groups.keys()))[1],
+                    "is_container": True
+                }
+            }
+            stage_events.append(container_event)
+            
+            # 处理每个分配
+            current_ts = global_timestamp
+            for alloc in allocs:
+                alloc_events, _ = self._process_allocation(alloc, proc_mem.pid, current_ts)
+                stage_events.extend(alloc_events)
+                current_ts += alloc.mem_size
+            
+            # 合并同名调用
+            merged_events = self._merge_calls(stage_events, stage_name)
+            trace_events.extend(merged_events)
+            global_timestamp = max_ts
+        
+        self._save_json(output_json, trace_events)
+        self.executor.shutdown()
+
+    def _merge_calls(self, events, stage_name):
+        """合并相同stage下的同名调用"""
+        # 分离容器事件和调用事件
+        container = [e for e in events if e.get("args", {}).get("is_container")][0]
+        calls = [e for e in events if not e.get("args", {}).get("is_container")]
+        
+        # 按深度和名称分组
+        call_groups = defaultdict(list)
+        for e in calls:
+            key = (e["args"]["depth"], e["name"])
+            call_groups[key].append(e)
+        
+        # 合并每组调用
+        merged_calls = []
+        for (depth, name), group in call_groups.items():
+            if len(group) == 1:
+                merged_calls.extend(group)
+                continue
+                
+            group.sort(key=lambda x: x["ts"])
+            current = dict(group[0])
+            
+            for e in group[1:]:
+                if e["ts"] == current["ts"] + current["dur"]:
+                    current["dur"] += e["dur"]
+                    current["args"]["bytes"] += e["args"]["bytes"]
+                    if "merged_ptrs" not in current["args"]:
+                        current["args"]["merged_ptrs"] = [current["args"]["alloc_ptr"]]
+                    current["args"]["merged_ptrs"].append(e["args"]["alloc_ptr"])
+                else:
+                    if "merged_ptrs" in current["args"]:
+                        current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"])
+                        del current["args"]["merged_ptrs"]
+                    merged_calls.append(current)
+                    current = dict(e)
+            
+            if "merged_ptrs" in current["args"]:
+                current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"])
+                del current["args"]["merged_ptrs"]
+            merged_calls.append(current)
+        
+        # 确保容器事件在最前
+        return [container] + sorted(merged_calls, key=lambda x: x["ts"])
+
+    def _process_allocation(self, alloc, pid, base_ts):
+        """处理单个分配事件"""
+        events = []
+        alloc_duration = alloc.mem_size
+        
+        # 构建调用栈树
+        call_tree = {
+            "name": "[root]",
+            "duration": alloc_duration,
+            "children": []
+        }
+        current_parent = call_tree
+        
+        for frame in alloc.stack_frames:
+            so_name = os.path.basename(frame.so_name)
+            symbol = self._resolve_symbol(so_name, frame.address)
+            node = {
+                "name": symbol,
+                "duration": alloc_duration,
+                "children": []
+            }
+            current_parent["children"].append(node)
+            current_parent = node
+        
+        # 调整duration
+        def adjust_durations(node):
+            if node["children"]:
+                node["duration"] = sum(adjust_durations(child) for child in node["children"])
+            return node["duration"]
+        adjust_durations(call_tree)
+        
+        # 生成事件（BFS遍历）
+        stack = deque([(call_tree, base_ts, 0)])
+        call_events = []
+        while stack:
+            node, ts, depth = stack.popleft()
+            call_events.append({
+                "name": node["name"],
+                "ph": "X",
+                "ts": ts,
+                "dur": node["duration"],
+                "pid": pid,
+                "tid": pid,
+                "args": {
+                    "depth": depth,
+                    "bytes": alloc.mem_size,
+                    "alloc_ptr": f"0x{alloc.alloc_ptr:x}"
+                }
+            })
+            for child in reversed(node["children"]):
+                stack.appendleft((child, ts, depth + 1))
+        
+        return call_events, alloc_duration
+
+    # 保留其他基础方法
+    def _load_proc_mem(self, path):
+        with open(path, "rb") as f:
+            proc_mem = ProcMem()
+            proc_mem.ParseFromString(f.read())
+            return proc_mem
+
+    def _analyze_allocations(self, proc_mem):
+        freed_ptrs = {free.alloc_ptr for free in proc_mem.mem_free_stacks}
+        active_allocs = defaultdict(list)
+        for alloc in proc_mem.mem_alloc_stacks:
+            #if alloc.alloc_ptr not in freed_ptrs:
+            active_allocs[(alloc.stage_type, alloc.stage_id)].append(alloc)
+        return active_allocs
+
+    def _precache_symbols(self, alloc_groups):
+        unique_frames = set()
+        for allocs in alloc_groups.values():
+            for alloc in allocs:
+                for frame in alloc.stack_frames:
+                    so_name = os.path.basename(frame.so_name)
+                    unique_frames.add((so_name, frame.address))
+        list(self.executor.map(lambda args: self._resolve_symbol(*args), unique_frames))
+
+    def _resolve_symbol(self, so_name, address):
+        cache_key = f"{so_name}:{address:x}"
+        if cache_key in self.symbol_cache:
+            return self.symbol_cache[cache_key]
+        
+        so_path = self._find_so_path(so_name)
+        if not so_path:
+            symbol = f"{so_name}@0x{address:x}"
+            self.symbol_cache[cache_key] = symbol
+            return symbol
+        
+        try:
+            result = subprocess.run(
+                ["addr2line", "-e", so_path, "-f", "-C", "-p", f"0x{address:x}"],
+                capture_output=True, text=True, timeout=0.05
+            )
+            func_name = result.stdout.split(" at ")[0].split("(")[0].strip() if result.returncode == 0 else ""
+            symbol = f"{so_name}@{func_name}" if func_name else f"{so_name}@0x{address:x}"
+        except:
+            symbol = f"{so_name}@0x{address:x}"
+        
+        self.symbol_cache[cache_key] = symbol
+        return symbol
+
+    def _find_so_path(self, so_name):
+        if so_name in self.so_path_cache:
+            return self.so_path_cache[so_name]
+        
+        if os.path.isabs(so_name) and os.path.exists(so_name):
+            self.so_path_cache[so_name] = so_name
+            return so_name
+        
+        base_name = os.path.basename(so_name)
+        search_paths = [
+            "/usr/lib", "/usr/local/lib", "/lib",
+            *os.getenv("LD_LIBRARY_PATH", "").split(":"),
+            *os.getenv("PATH", "").split(":")
+        ]
+        
+        for path in filter(os.path.isdir, search_paths):
+            test_path = os.path.join(path, base_name)
+            if os.path.exists(test_path):
+                self.so_path_cache[so_name] = test_path
+                return test_path
+            
+            if base_name.startswith("lib") and ".so" in base_name:
+                lib_prefix = base_name.split(".so")[0]
+                for ext in ["", ".1", ".2", ".3", ".4", ".5"]:
+                    test_path = os.path.join(path, f"{lib_prefix}.so{ext}")
+                    if os.path.exists(test_path):
+                        self.so_path_cache[so_name] = test_path
+                        return test_path
+        
+        self.so_path_cache[so_name] = None
+        return None
+
+    def _save_json(self, path, trace_events):
+        if os.path.isdir(path):
+            input_name = os.path.splitext(os.path.basename(sys.argv[1]))[0]
+            path = os.path.join(path, f"{input_name}_fixed_flamegraph.json")
+        
+        with open(path, "w") as f:
+            json.dump({
+                "traceEvents": sorted(trace_events, key=lambda x: x["ts"]),
+                "displayTimeUnit": "ns",
+                "metadata": {
+                    "format": "FixedFlameGraph",
+                    "stage_order": list(self.stage_names.values())
+                }
+            }, f, indent=2)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python proc_mem_converter.py input.pb output.json")
+        sys.exit(1)
+    FixedFlameGraphConverter().convert(sys.argv[1], sys.argv[2])
\ No newline at end of file
diff --git a/systrace/convert/convert_mem_to_flamegraph_for_cur.py b/systrace/convert/convert_mem_to_flamegraph_for_cur.py
new file mode 100644
index 0000000000000000000000000000000000000000..38800260ed6049416ef7ae5a8872e8e95e543fbd
--- /dev/null
+++ b/systrace/convert/convert_mem_to_flamegraph_for_cur.py
@@ -0,0 +1,305 @@
+#!/usr/bin/env python3
+import sys
+import json
+import os
+import subprocess
+from collections import defaultdict, deque
+from concurrent.futures import ThreadPoolExecutor
+from systrace_pb2 import ProcMem, StageType
+
+class FixedFlameGraphConverter:
+    def __init__(self):
+        self.stage_names = {
+            StageType.STAGE_UNKNOWN: "UNKNOWN",
+            StageType.STAGE_DATALOADER: "DATALOADER",
+            StageType.STAGE_FORWARD: "FORWARD",
+            StageType.STAGE_BACKWARD: "BACKWARD",
+            StageType.STAGE_SYNCHRONIZATION: "SYNCHRONIZATION",
+            getattr(StageType, "STAGE_GC", 5): "GC"
+        }
+        self.symbol_cache = {}
+        self.so_path_cache = {}
+        self.executor = ThreadPoolExecutor(max_workers=os.cpu_count() or 4)
+
+    def convert(self, input_pb, output_json):
+        proc_mem = self._load_proc_mem(input_pb)
+        alloc_groups = self._analyze_allocations(proc_mem)
+        self._precache_symbols(alloc_groups)
+        
+        trace_events = []
+        current_ts = 0 
+        alloc_records = {alloc.alloc_ptr: alloc for alloc in proc_mem.mem_alloc_stacks}
+        stage_stats = defaultdict(lambda: {'allocated': 0, 'freed': 0})
+
+        # 统计分配和释放
+        for alloc in proc_mem.mem_alloc_stacks:
+            stage_key = (alloc.stage_type, alloc.stage_id)
+            stage_stats[stage_key]['allocated'] += alloc.mem_size
+        for free in proc_mem.mem_free_stacks:
+            if free.alloc_ptr in alloc_records:
+                alloc = alloc_records[free.alloc_ptr]
+                stage_key = (free.stage_type, free.stage_id)
+                stage_stats[stage_key]['freed'] += alloc.mem_size
+
+        # 按stage_name分组（仅一次）
+        stage_data = defaultdict(list)
+        for (stage_type, stage_id), allocs in alloc_groups.items():
+            stage_name = f"{stage_id}_{self.stage_names.get(stage_type, 'UNKNOWN')}"
+            stage_data[stage_name].extend(allocs)
+
+        # 计算累计分配和持有内存
+        cumulative_alloc = 0
+        stage_alloc_info = {}
+        for stage_name, allocs in stage_data.items():
+            stage_key = next(k for k in alloc_groups.keys() 
+                            if f"{k[1]}_{self.stage_names.get(k[0], 'UNKNOWN')}" == stage_name)
+            current_alloc = sum(a.mem_size for a in allocs)
+            current_free = stage_stats[stage_key]['freed']
+            cumulative_alloc += (current_alloc - current_free)
+            held_memory = max(cumulative_alloc, 0)
+            stage_alloc_info[stage_name] = {
+                'allocated': current_alloc,
+                'freed': current_free,
+                'held': held_memory  # 避免负数
+            }
+            cumulative_alloc += current_alloc
+
+        # 生成时间轴
+        for stage_name, allocs in stage_data.items():
+            if stage_name.startswith(("0_", "1_", "2_")):
+                continue
+
+            stage_events = []
+            min_ts = current_ts  # 使用严格连续的时间戳
+            allocated_size = sum(a.mem_size for a in allocs)
+            max_ts = min_ts + allocated_size  # 时间范围 = 新分配的内存
+
+            # 容器事件（时间范围反映新分配的内存）
+            container_event = {
+                "name": stage_name,
+                "ph": "X",
+                "ts": min_ts,
+                "dur": stage_alloc_info[stage_name]['held'] / 10000000, # 等于allocated_size
+                "pid": proc_mem.pid,
+                "tid": 1,
+                "args": {
+                    "stage_type": self.stage_names.get(next(iter(alloc_groups.keys()))[0], "UNKNOWN"),
+                    "stage_id": next(iter(alloc_groups.keys()))[1],
+                    "is_container": True,
+                    "allocated": stage_alloc_info[stage_name]['allocated'],
+                    "freed": stage_alloc_info[stage_name]['freed'],
+                    "held": stage_alloc_info[stage_name]['held']  # 持有的内存量（元数据）
+                }
+            }
+            stage_events.append(container_event)
+
+            alloc_start_ts = min_ts
+            for alloc in allocs:
+                alloc_events, _ = self._process_allocation(alloc, proc_mem.pid, alloc_start_ts)
+                stage_events.extend(alloc_events)
+                alloc_start_ts += alloc.mem_size
+
+            trace_events.extend(self._merge_calls(stage_events, stage_name))
+            current_ts = max_ts
+
+        self._save_json(output_json, trace_events)
+        self.executor.shutdown()
+
+    def _merge_calls(self, events, stage_name):
+        """合并相同stage下的同名调用"""
+        # 分离容器事件和调用事件
+        container = [e for e in events if e.get("args", {}).get("is_container")][0]
+        calls = [e for e in events if not e.get("args", {}).get("is_container")]
+        
+        # 按深度和名称分组
+        call_groups = defaultdict(list)
+        for e in calls:
+            key = (e["args"]["depth"], e["name"])
+            call_groups[key].append(e)
+        
+        # 合并每组调用
+        merged_calls = []
+        for (depth, name), group in call_groups.items():
+            if len(group) == 1:
+                merged_calls.extend(group)
+                continue
+                
+            group.sort(key=lambda x: x["ts"])
+            current = dict(group[0])
+            
+            for e in group[1:]:
+                if e["ts"] == current["ts"] + current["dur"]:
+                    current["dur"] += e["dur"]
+                    current["args"]["bytes"] += e["args"]["bytes"]
+                    if "merged_ptrs" not in current["args"]:
+                        current["args"]["merged_ptrs"] = [current["args"]["alloc_ptr"]]
+                    current["args"]["merged_ptrs"].append(e["args"]["alloc_ptr"])
+                else:
+                    if "merged_ptrs" in current["args"]:
+                        current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"])
+                        del current["args"]["merged_ptrs"]
+                    merged_calls.append(current)
+                    current = dict(e)
+            
+            if "merged_ptrs" in current["args"]:
+                current["args"]["alloc_ptr"] = ",".join(current["args"]["merged_ptrs"])
+                del current["args"]["merged_ptrs"]
+            merged_calls.append(current)
+        
+        # 确保容器事件在最前
+        return [container] + sorted(merged_calls, key=lambda x: x["ts"])
+
+    def _process_allocation(self, alloc, pid, base_ts):
+        """处理单个分配事件"""
+        events = []
+        alloc_duration = alloc.mem_size
+        
+        # 构建调用栈树
+        call_tree = {
+            "name": "[root]",
+            "duration": alloc_duration,
+            "children": []
+        }
+        current_parent = call_tree
+        
+        for frame in alloc.stack_frames:
+            so_name = os.path.basename(frame.so_name)
+            symbol = self._resolve_symbol(so_name, frame.address)
+            node = {
+                "name": symbol,
+                "duration": alloc_duration,
+                "children": []
+            }
+            current_parent["children"].append(node)
+            current_parent = node
+        
+        # 调整duration
+        def adjust_durations(node):
+            if node["children"]:
+                node["duration"] = sum(adjust_durations(child) for child in node["children"])
+            return node["duration"]
+        adjust_durations(call_tree)
+        
+        # 生成事件（BFS遍历）
+        stack = deque([(call_tree, base_ts, 0)])
+        call_events = []
+        while stack:
+            node, ts, depth = stack.popleft()
+            call_events.append({
+                "name": node["name"],
+                "ph": "X",
+                "ts": ts,
+                "dur": node["duration"],
+                "pid": pid,
+                "tid": 2,
+                "args": {
+                    "depth": depth,
+                    "bytes": alloc.mem_size,
+                    "alloc_ptr": f"0x{alloc.alloc_ptr:x}"
+                }
+            })
+            for child in reversed(node["children"]):
+                stack.appendleft((child, ts, depth + 1))
+        
+        return call_events, alloc_duration
+
+    # 保留其他基础方法
+    def _load_proc_mem(self, path):
+        with open(path, "rb") as f:
+            proc_mem = ProcMem()
+            proc_mem.ParseFromString(f.read())
+            return proc_mem
+
+    def _analyze_allocations(self, proc_mem):
+        freed_ptrs = {free.alloc_ptr for free in proc_mem.mem_free_stacks}
+        active_allocs = defaultdict(list)
+        for alloc in proc_mem.mem_alloc_stacks:
+            #if alloc.alloc_ptr not in freed_ptrs:
+            active_allocs[(alloc.stage_type, alloc.stage_id)].append(alloc)
+        return active_allocs
+
+    def _precache_symbols(self, alloc_groups):
+        unique_frames = set()
+        for allocs in alloc_groups.values():
+            for alloc in allocs:
+                for frame in alloc.stack_frames:
+                    so_name = os.path.basename(frame.so_name)
+                    unique_frames.add((so_name, frame.address))
+        list(self.executor.map(lambda args: self._resolve_symbol(*args), unique_frames))
+
+    def _resolve_symbol(self, so_name, address):
+        cache_key = f"{so_name}:{address:x}"
+        if cache_key in self.symbol_cache:
+            return self.symbol_cache[cache_key]
+        
+        so_path = self._find_so_path(so_name)
+        if not so_path:
+            symbol = f"{so_name}@0x{address:x}"
+            self.symbol_cache[cache_key] = symbol
+            return symbol
+        
+        try:
+            result = subprocess.run(
+                ["addr2line", "-e", so_path, "-f", "-C", "-p", f"0x{address:x}"],
+                capture_output=True, text=True, timeout=0.05
+            )
+            func_name = result.stdout.split(" at ")[0].split("(")[0].strip() if result.returncode == 0 else ""
+            symbol = f"{so_name}@{func_name}" if func_name else f"{so_name}@0x{address:x}"
+        except:
+            symbol = f"{so_name}@0x{address:x}"
+        
+        self.symbol_cache[cache_key] = symbol
+        return symbol
+
+    def _find_so_path(self, so_name):
+        if so_name in self.so_path_cache:
+            return self.so_path_cache[so_name]
+        
+        if os.path.isabs(so_name) and os.path.exists(so_name):
+            self.so_path_cache[so_name] = so_name
+            return so_name
+        
+        base_name = os.path.basename(so_name)
+        search_paths = [
+            "/usr/lib", "/usr/local/lib", "/lib",
+            *os.getenv("LD_LIBRARY_PATH", "").split(":"),
+            *os.getenv("PATH", "").split(":")
+        ]
+        
+        for path in filter(os.path.isdir, search_paths):
+            test_path = os.path.join(path, base_name)
+            if os.path.exists(test_path):
+                self.so_path_cache[so_name] = test_path
+                return test_path
+            
+            if base_name.startswith("lib") and ".so" in base_name:
+                lib_prefix = base_name.split(".so")[0]
+                for ext in ["", ".1", ".2", ".3", ".4", ".5"]:
+                    test_path = os.path.join(path, f"{lib_prefix}.so{ext}")
+                    if os.path.exists(test_path):
+                        self.so_path_cache[so_name] = test_path
+                        return test_path
+        
+        self.so_path_cache[so_name] = None
+        return None
+
+    def _save_json(self, path, trace_events):
+        if os.path.isdir(path):
+            input_name = os.path.splitext(os.path.basename(sys.argv[1]))[0]
+            path = os.path.join(path, f"{input_name}_fixed_flamegraph.json")
+        
+        with open(path, "w") as f:
+            json.dump({
+                "traceEvents": sorted(trace_events, key=lambda x: x["ts"]),
+                "displayTimeUnit": "ns",
+                "metadata": {
+                    "format": "FixedFlameGraph",
+                    "stage_order": list(self.stage_names.values())
+                }
+            }, f, indent=2)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python proc_mem_converter.py input.pb output.json")
+        sys.exit(1)
+    FixedFlameGraphConverter().convert(sys.argv[1], sys.argv[2])
\ No newline at end of file
diff --git a/systrace/convert/convert_mspti_timeline.py b/systrace/convert/convert_mspti_timeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8bbcf8f20972ee44571a71047460d0f288811d5
--- /dev/null
+++ b/systrace/convert/convert_mspti_timeline.py
@@ -0,0 +1,129 @@
+# coding=utf-8
+"""
+Copyright (c) Huawei Technologies Co., Ltd. 2020-2028. All rights reserved.
+Description:
+FileName：slow_node_detection.py
+Author: h00568282/huangbin
+Create Date: 2025/3/26 11:23
+Notes:
+
+"""
+import os
+import json
+import pandas as pd
+from convert_json2csv import convert_jsons2csv
+
+__all__ = ['convert_mspti_timeline']
+
+MODE = {
+    0: "Host",
+    1: "Device"
+}
+OP_COLORS = {
+    'HcclAllreduce': "good",
+    'HcclAllReduce': "good",
+    'HcclAllGather': "bad",
+    'HcclBroadcast': "yellow",
+    'HcclReduceScatter': "olive",
+    'HcclSend': "good",
+    'HcclReceive': "good",
+    'HcclBatchSendRecv': "thread_state_runnable"
+}
+
+
+def create_args(row):
+    return {
+        "id": row["Id"],
+        "comm_group": row["comm_group"],
+        "count": row["count"]
+    }
+
+
+def split_df(df):
+    """
+    根据 mode 列将 DataFrame 拆分为 host 和 device 两个 DataFrame
+    """
+    df_host = df[df['SourceKind'] == 0]
+    df_device = df[df['SourceKind'] == 1]
+    return df_host, df_device
+
+
+def process_df(data_df, device_id, id2name_dict: dict):
+    """
+    对 DataFrame 进行处理，包括分组聚合、列拆分、添加新列等操作
+    """
+
+    data_df["Name"] = data_df['Id'].map(id2name_dict)
+    df = data_df.groupby('Id').agg({
+        'Timestamp': ['min', 'max'],
+        'Kind': 'first',
+        'SourceKind': 'first',
+        'Name': 'first',
+    }).reset_index()
+    df.columns = ['Id', 'start', 'end', 'Kind', 'SourceKind', 'Name']
+    df[['comm_op', 'comm_group', 'data_type', 'count']] = df['Name'].str.replace('comm:', '').str.split(',',
+                                                                                                        expand=True)
+    df = df.drop(columns=['Name'])
+    df['cat'] = "hccl"
+    df['name'] = df['comm_op']
+    df['cname'] = df['comm_op'].map(OP_COLORS)
+    df['end'] = df['end'] / 1000.
+    df['start'] = df['start'] / 1000.
+    df['dur'] = df['end'] - df['start']
+    df['ph'] = "X"
+    df['pid'] = f"rank_{device_id}"
+    df['tid'] = df["SourceKind"].map(MODE)
+    df['args'] = df.apply(create_args, axis=1)
+    result = df[['cat', 'name', 'ph', 'pid', 'tid', 'start', 'dur', 'cname', 'args']].rename(
+        columns={'start': 'ts'}).to_dict(orient='records')
+    return result
+
+
+def process_files(root_path, debug: bool = False):
+    """
+    处理指定路径下的所有 CSV 文件
+    """
+    csv_files = [file for file in os.listdir(root_path) if file.endswith("csv") and "device" not in file]
+    all_ranks = []
+    for csv_file in csv_files:
+        print(f"start file: {csv_file}")
+        csv_file_path = os.path.join(root_path, csv_file)
+        df = pd.read_csv(csv_file_path)
+        if debug:
+            df = df.head(12)
+
+        id2name_dict = df[df['Name'].notna()].set_index('Id')['Name'].to_dict()
+        # df['name'] = df.groupby('id')['name'].transform(lambda x: x.ffill().bfill())
+        df_host, df_device = split_df(df)
+        device_id = df_device['msptiObjecId_Ds_DeviceId'].unique()[0]
+        host_result = process_df(df_host, device_id, id2name_dict)
+        all_ranks.extend(host_result)
+        device_result = process_df(df_device, device_id, id2name_dict)
+        all_ranks.extend(device_result)
+    return all_ranks
+
+
+def save_to_json(all_ranks, files_path):
+    """
+    将处理结果保存为 JSON 文件
+    """
+    output = {
+        "traceEvents": all_ranks,
+        "stackFrames": {}
+    }
+    json_output = json.dumps(output, indent=4)
+    with open(os.path.join(files_path, f'mspti_comm_ops_timeline.json'), 'w') as f:
+        f.write(json_output)
+
+
+def convert_mspti_timeline(data_path: str):
+    convert_jsons2csv(data_path)
+    all_ranks = process_files(data_path)
+    save_to_json(all_ranks, data_path)
+
+
+if __name__ == "__main__":
+    files_path = "D:\\startwork\\AOPS\\09-25年技术规划\\Code\\mspti_test-megatron-0224\\mspti_test-megatron-0224\\data\\log\\all_merge"
+    convert_jsons2csv(files_path)
+    all_ranks = process_files(files_path)
+    save_to_json(all_ranks, files_path)
\ No newline at end of file
diff --git a/systrace/convert/convert_pytorch_to_timeline.py b/systrace/convert/convert_pytorch_to_timeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb42d8b2d80767786166a7cefc17fb22056017bf
--- /dev/null
+++ b/systrace/convert/convert_pytorch_to_timeline.py
@@ -0,0 +1,50 @@
+import json
+import systrace_pb2
+import argparse
+import glob
+
+def process_timeline_file(input_path, trace_data):
+    with open(input_path, "rb") as f:
+        pytorch_data = systrace_pb2.Pytorch()
+        pytorch_data.ParseFromString(f.read())
+    
+    for stage in pytorch_data.pytorch_stages:
+        trace_data["traceEvents"].append({
+            "name": stage.stage_type,
+            "cat": "pytorch",
+            "ph": "X",
+            "pid": pytorch_data.rank,
+            "tid": pytorch_data.rank if "GC" not in stage.stage_type else f"{pytorch_data.rank}:gc",
+            "ts": stage.start_us,
+            "dur": stage.end_us - stage.start_us,
+            "args": {
+                "stage_id": stage.stage_id,
+                "comm": pytorch_data.comm,
+                "stack_frames": list(stage.stack_frames),
+                "gc_collected": stage.gc_debug.collected if stage.HasField("gc_debug") else 0,
+                "gc_uncollectable": stage.gc_debug.uncollectable if stage.HasField("gc_debug") else 0
+            }
+        })
+
+def aggregate_timeline_files(output_path):
+    trace_data = {
+        "traceEvents": [],
+        "displayTimeUnit": "ns",
+        "metadata": {"format": "Pytorch Profiler"}
+    }
+
+    for timeline_file in glob.glob("*timeline"):
+        print(f"Processing {timeline_file}")
+        process_timeline_file(timeline_file, trace_data)
+    
+    trace_data["traceEvents"].sort(key=lambda x: x["args"]["stage_id"])
+    
+    with open(output_path, "w") as f:
+        json.dump(trace_data, f, indent=None, separators=(',', ':'))
+    print(f"Aggregated {len(trace_data['traceEvents'])} events to {output_path}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Aggregate all *.timeline files into a single JSON')
+    parser.add_argument('--output', required=True, help='Output JSON file path')
+    args = parser.parse_args()
+    aggregate_timeline_files(args.output)
\ No newline at end of file
diff --git a/systrace/hack/format.sh b/systrace/hack/format.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9a05c76ccf81b96a6978d6beb3f22b7853bee2eb
--- /dev/null
+++ b/systrace/hack/format.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+function install_clang_format() {
+    if ! command -v clang-format &> /dev/null; then
+        echo "Installing clang-format..."
+        if command -v apt &> /dev/null; then
+            sudo apt install -y clang-format
+        elif command -v yum &> /dev/null; then
+            sudo yum install -y clang-format
+        else
+            echo "Error: Cannot install clang-format (unsupported package manager)."
+            exit 1
+        fi
+    else
+        echo "clang-format is already installed."
+    fi
+}
+
+function setup_clang_format() {
+    local clang_format_file=".clang-format"
+    if [ ! -f "$clang_format_file" ]; then
+        echo "Creating .clang-format with 4-space indentation and Allman braces..."
+        cat > "$clang_format_file" <<EOF
+BasedOnStyle: LLVM
+IndentWidth: 4
+BreakBeforeBraces: Allman
+UseTab: Never
+TabWidth: 4
+EOF
+    else
+        echo ".clang-format already exists. Modify it manually if needed."
+    fi
+}
+
+function format_code() {
+    echo "Formatting C/C++ files..."
+    find . -type f \( -name "*.c" -o -name "*.h" -o -name "*.cpp" -o -name "*.hpp" -o -name "*.cc" -o -name "*.cxx" \) \
+        | grep -v "pb-c" \
+        | xargs clang-format -i -style=file
+    echo "Formatting complete!"
+}
+
+install_clang_format
+setup_clang_format
+format_code
\ No newline at end of file
diff --git a/systrace/include/common/constant.h b/systrace/include/common/constant.h
new file mode 100644
index 0000000000000000000000000000000000000000..1bee6fa55449f71c915c5825eae64770aaa013b8
--- /dev/null
+++ b/systrace/include/common/constant.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "shared_constants.h"
+#include <algorithm>
+
+namespace systrace
+{
+namespace constant
+{
+
+struct TorchTraceConstant
+{
+  public:
+    static constexpr int DEFAULT_TRACE_COUNT = 1000;
+    static constexpr std::string_view DEFAULT_TRACE_DUMP_PATH =
+        SYS_TRACE_ROOT_DIR "timeline";
+};
+
+} // namespace constant
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/include/common/logging.cc b/systrace/include/common/logging.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f7dec9774ee15a0234ee7893ef06571323086956
--- /dev/null
+++ b/systrace/include/common/logging.cc
@@ -0,0 +1,6 @@
+#include "logging.h"
+
+namespace systrace
+{
+void setLoggingPath() { return; }
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/include/common/logging.h b/systrace/include/common/logging.h
new file mode 100644
index 0000000000000000000000000000000000000000..dc7ef35acc43f2959a3ffd6736f2d1cf094c7103
--- /dev/null
+++ b/systrace/include/common/logging.h
@@ -0,0 +1,28 @@
+#pragma once
+
+enum LogLevel
+{
+    INFO,
+    WARNING,
+    ERROR,
+    FATAL
+};
+
+#define LOG(level)                                                             \
+    if (level == INFO)                                                         \
+        std::cerr << "[INFO] ";                                                \
+    else if (level == WARNING)                                                 \
+        std::cerr << "[WARNING] ";                                             \
+    else if (level == ERROR)                                                   \
+        std::cerr << "[ERROR] ";                                               \
+    else if (level == FATAL)                                                   \
+        std::cerr << "[FATAL] ";                                               \
+    std::cerr
+
+#define STLOG(level)                                                           \
+    LOG(level) << ::systrace::util::config::GlobalConfig::Instance().rank_str
+
+namespace systrace
+{
+void setLoggingPath();
+}
\ No newline at end of file
diff --git a/systrace/include/common/macro.h b/systrace/include/common/macro.h
new file mode 100644
index 0000000000000000000000000000000000000000..56d60ab48fd16dd4ef02c9339c3509a9c2df8bb4
--- /dev/null
+++ b/systrace/include/common/macro.h
@@ -0,0 +1,17 @@
+#pragma once
+#define EXPOSE_API __attribute__((visibility("default")))
+
+#define SETUP_SYMBOL_FOR_LOAD_LIBRARY(handle, symbol, func_ptr, func_type,     \
+                                      msg)                                     \
+    do                                                                         \
+    {                                                                          \
+        func_ptr = (func_type)dlsym(handle, symbol);                           \
+        const char *dlsym_error = dlerror();                                   \
+        if (dlsym_error)                                                       \
+        {                                                                      \
+            STLOG(WARNING) << "Load fn `" << symbol << "` error in " << msg    \
+                           << dlsym_error;                                     \
+            is_usable_ = false;                                                \
+            return;                                                            \
+        }                                                                      \
+    } while (0)
diff --git a/systrace/include/common/shared_constants.h b/systrace/include/common/shared_constants.h
new file mode 100644
index 0000000000000000000000000000000000000000..d4408e4c5467ad41c68169dc3c41092d2662d618
--- /dev/null
+++ b/systrace/include/common/shared_constants.h
@@ -0,0 +1,12 @@
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    extern int global_stage_id;
+    extern int global_stage_type;
+#define SYS_TRACE_ROOT_DIR "/home/sysTrace/"
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/systrace/include/common/util.cc b/systrace/include/common/util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a526d0c4f96eb4f333976f2eee53d90e28512918
--- /dev/null
+++ b/systrace/include/common/util.cc
@@ -0,0 +1,253 @@
+#include "util.h"
+#include "constant.h"
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <filesystem>
+#include <fstream>
+#include <mutex>
+#include <thread>
+#include <unistd.h>
+
+namespace systrace
+{
+namespace util
+{
+
+namespace env
+{
+std::string_view EnvVarRegistry::DEFAULT_VALUE_STRING = "NONE";
+int EnvVarRegistry::DEFAULT_VALUE_INT = 0;
+bool EnvVarRegistry::DEFAULT_VALUE_BOOL = false;
+} // namespace env
+namespace fs_utils
+{
+
+int CreateDirectoryIfNotExists(const std::string &path)
+{
+    std::filesystem::path d_path(path);
+    try
+    {
+        if (!std::filesystem::exists(d_path))
+        {
+            std::filesystem::create_directories(d_path);
+        }
+        if (!std::filesystem::is_directory(d_path))
+        {
+            LOG(ERROR) << "Path exists but is not a directory: " << path;
+            return 1;
+        }
+    }
+    catch (const std::filesystem::filesystem_error &e)
+    {
+        LOG(ERROR) << "Failed to create directory " << path << ": " << e.what();
+        return 1;
+    }
+    return 0;
+}
+
+std::string GenerateClusterUniqueFilename(const std::string &suffix)
+{
+    try
+    {
+        char hostname[128];
+        gethostname(hostname, sizeof(hostname));
+        std::ostringstream oss;
+        oss << hostname << "--" << std::setw(5) << std::setfill('0')
+            << config::GlobalConfig::Instance().rank << suffix;
+        return oss.str();
+    }
+    catch (const std::exception &e)
+    {
+        LOG(ERROR) << "Filename generation failed: " << e.what();
+        return "error_" + std::to_string(std::time(nullptr)) + suffix;
+    }
+}
+
+} // namespace fs_utils
+
+namespace config
+{
+
+class DeviceManager
+{
+  public:
+    static constexpr uint64_t MAX_DEVICES = 16;
+    static constexpr const char *DEVICE_PATH_PREFIX = "/dev/davinci";
+
+    static std::vector<uint64_t> DetectAvailableDevices()
+    {
+        std::vector<uint64_t> available_devices;
+        available_devices.reserve(MAX_DEVICES);
+
+        for (uint64_t device_index = 0; device_index < MAX_DEVICES;
+             ++device_index)
+        {
+            if (IsDevicePresent(device_index))
+            {
+                available_devices.push_back(device_index);
+                if (config::GlobalConfig::Instance().local_rank == 0)
+                {
+                    LOG(INFO)
+                        << "Found device: " << GetDevicePath(device_index);
+                }
+            }
+        }
+
+        std::sort(available_devices.begin(), available_devices.end());
+        return available_devices;
+    }
+
+  private:
+    static bool IsDevicePresent(uint64_t index)
+    {
+        return std::filesystem::exists(GetDevicePath(index));
+    }
+
+    static std::string GetDevicePath(uint64_t index)
+    {
+        return std::string(DEVICE_PATH_PREFIX) + std::to_string(index);
+    }
+};
+
+namespace
+{
+
+GlobalConfig &config = GlobalConfig::Instance();
+
+void LoadEnvironmentVariables()
+{
+    auto loadInt = [](const char *name)
+    { return env::EnvVarRegistry::GetEnvVar<int>(name); };
+
+    auto loadStr = [](const char *name)
+    { return env::EnvVarRegistry::GetEnvVar<std::string>(name); };
+
+    config.rank = loadInt("RANK");
+    config.job_name = loadStr("ENV_ARGO_WORKFLOW_NAME");
+    config.local_rank = loadInt("LOCAL_RANK");
+    config.local_world_size = loadInt("LOCAL_WORLD_SIZE");
+    config.world_size = loadInt("WORLD_SIZE");
+    config.rank_str = "[RANK " + std::to_string(config.rank) + "] ";
+}
+
+void ValidateDeviceConfiguration()
+{
+    config.devices = DeviceManager::DetectAvailableDevices();
+
+    if (config.devices.empty())
+    {
+        config.enable = false;
+        LOG(WARNING) << "No devices found, disabling tracing";
+        return;
+    }
+
+    if (config.local_world_size != config.devices.size())
+    {
+        LOG(WARNING) << "Local world size mismatch, disabling hook";
+        config.enable = false;
+    }
+}
+
+} // namespace
+
+void InitializeGlobalConfiguration()
+{
+    LOG(INFO) << "Initializing global configuration";
+
+    try
+    {
+        LoadEnvironmentVariables();
+        ValidateDeviceConfiguration();
+        LOG(INFO) << "Global configuration initialized successfully";
+    }
+    catch (const std::exception &e)
+    {
+        LOG(ERROR) << "Global config initialization failed: " << e.what();
+        throw;
+    }
+}
+
+} // namespace config
+
+namespace environment
+{
+
+bool IsValidEnvironmentVariableName(const std::string &name)
+{
+    if (name.empty() || !isalpha(name[0]))
+    {
+        return false;
+    }
+
+    for (char c : name)
+    {
+        if (!isalnum(c) && c != '_')
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+void RegisterRequiredEnvironmentVariables()
+{
+    try
+    {
+        if (!IsValidEnvironmentVariableName("ENV_ARGO_WORKFLOW_NAME"))
+        {
+            throw std::invalid_argument(
+                "Invalid env var name: ENV_ARGO_WORKFLOW_NAME");
+        }
+        REGISTER_ENVIRONMENT_VARIABLE(
+            "ENV_ARGO_WORKFLOW_NAME",
+            env::EnvVarRegistry::DEFAULT_VALUE_STRING);
+
+        if (!IsValidEnvironmentVariableName("SYSTRACE_SYMS_FILE"))
+        {
+            throw std::invalid_argument(
+                "Invalid env var name: SYSTRACE_SYMS_FILE");
+        }
+        REGISTER_ENVIRONMENT_VARIABLE(
+            "SYSTRACE_SYMS_FILE", env::EnvVarRegistry::DEFAULT_VALUE_STRING);
+
+        if (!IsValidEnvironmentVariableName("SYSTRACE_LOGGING_DIR"))
+        {
+            throw std::invalid_argument(
+                "Invalid env var name: SYSTRACE_LOGGING_DIR");
+        }
+        REGISTER_ENVIRONMENT_VARIABLE(
+            "SYSTRACE_LOGGING_DIR", env::EnvVarRegistry::DEFAULT_VALUE_STRING);
+
+        if (!IsValidEnvironmentVariableName("SYSTRACE_HOST_TRACING_FUNC"))
+        {
+            throw std::invalid_argument(
+                "Invalid env var name: SYSTRACE_HOST_TRACING_FUNC");
+        }
+        REGISTER_ENVIRONMENT_VARIABLE(
+            "SYSTRACE_HOST_TRACING_FUNC",
+            env::EnvVarRegistry::DEFAULT_VALUE_STRING);
+
+        REGISTER_ENVIRONMENT_VARIABLE("RANK", 0);
+        REGISTER_ENVIRONMENT_VARIABLE("LOCAL_RANK", 0);
+        REGISTER_ENVIRONMENT_VARIABLE("LOCAL_WORLD_SIZE", 1);
+        REGISTER_ENVIRONMENT_VARIABLE("WORLD_SIZE", 1);
+        REGISTER_ENVIRONMENT_VARIABLE("SYSTRACE_LOGGING_APPEND", false);
+    }
+    catch (const std::exception &e)
+    {
+        LOG(ERROR) << "Environment variable registration failed: " << e.what();
+        throw;
+    }
+}
+
+} // namespace environment
+
+void InitializeSystemUtilities()
+{
+    environment::RegisterRequiredEnvironmentVariables();
+    config::InitializeGlobalConfiguration();
+}
+
+} // namespace util
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/include/common/util.h b/systrace/include/common/util.h
new file mode 100644
index 0000000000000000000000000000000000000000..67c077af263cb8e86b886f0b03f3f303903429f4
--- /dev/null
+++ b/systrace/include/common/util.h
@@ -0,0 +1,290 @@
+#pragma once
+
+#include "logging.h"
+#include <cstdlib>
+#include <deque>
+#include <filesystem>
+#include <functional>
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <variant>
+#include <vector>
+
+namespace systrace
+{
+namespace util
+{
+namespace config
+{
+
+struct GlobalConfig
+{
+    uint32_t rank{0};
+    uint32_t local_rank{0};
+    uint32_t world_size{0};
+    uint32_t local_world_size{0};
+    std::string job_name;
+    bool enable{true};
+    std::vector<uint64_t> devices;
+    std::string rank_str;
+
+    static GlobalConfig &Instance()
+    {
+        static GlobalConfig instance;
+        return instance;
+    }
+
+  private:
+    GlobalConfig() = default;
+};
+
+void InitializeGlobalConfiguration();
+
+} // namespace config
+
+namespace fs_utils
+{
+
+std::string GenerateClusterUniqueFilename(const std::string &suffix);
+int CreateDirectoryIfNotExists(const std::string &path);
+
+} // namespace fs_utils
+
+namespace resource
+{
+template <typename T> class TimerPool
+{
+  public:
+    TimerPool() = default;
+    TimerPool(const TimerPool &) = delete;
+    TimerPool &operator=(const TimerPool &) = delete;
+
+    template <bool Init = true> T *getObject()
+    {
+        std::lock_guard<std::mutex> lock(mutex_);
+
+        T *obj = pool_.empty() ? nullptr : pool_.front();
+        if (obj)
+        {
+            pool_.pop_front();
+        }
+
+        return obj ? obj : (Init ? new T() : nullptr);
+    }
+
+    void returnObject(T *obj, int *size)
+    {
+        if (!obj)
+        {
+            if (size)
+                *size = 0;
+            return;
+        }
+
+        std::lock_guard<std::mutex> lock(mutex_);
+        pool_.push_back(obj);
+        if (size)
+            *size = static_cast<int>(pool_.size());
+    }
+
+    void clear()
+    {
+        std::lock_guard<std::mutex> lock(mutex_);
+        for (auto obj : pool_)
+        {
+            delete obj;
+        }
+        pool_.clear();
+    }
+
+    ~TimerPool() { clear(); }
+
+  private:
+    std::deque<T *> pool_;
+    std::mutex mutex_;
+};
+
+} // namespace resource
+
+namespace env
+{
+
+class EnvVarRegistry
+{
+  public:
+    using VarType = std::variant<int, bool, std::string>;
+
+    static std::string_view DEFAULT_VALUE_STRING;
+    static int DEFAULT_VALUE_INT;
+    static bool DEFAULT_VALUE_BOOL;
+
+    static void RegisterEnv(const std::string &name, VarType default_value)
+    {
+        auto &registry = GetRegistryManager();
+        LOG(INFO) << "[ENV] Register ENV " << name << " with default "
+                  << VariantToString(default_value) << std::endl;
+        registry[name] = std::move(default_value);
+    }
+
+    // Get an env var value, with optional printing
+    template <typename T> static T GetEnvVar(const std::string &name)
+    {
+        static_assert(is_supported_type<T>(),
+                      "Unsupported type for environment variable");
+
+        auto &registry = GetRegistryManager();
+        bool set = false;
+
+        // Try to get from environment first
+        T result = getEnvInner<T>(name, &set);
+        if (set)
+        {
+            LOG(INFO) << "[ENV] Get " << name << "=" << result
+                      << " from environment" << std::endl;
+            return result;
+        }
+
+        // Try to get from registered defaults
+        if (auto it = registry.find(name); it != registry.end())
+        {
+            if (const T *val = std::get_if<T>(&it->second))
+            {
+                LOG(INFO) << "[ENV] Get " << name << "=" << *val
+                          << " from register default" << std::endl;
+                return *val;
+            }
+            LOG(FATAL) << "[ENV] Wrong data type in `GetEnvVar`" << std::endl;
+        }
+
+        // Fall back to static default
+        result = getDefault<T>();
+        LOG(WARNING) << "[ENV] Get not register env " << name << "=" << result
+                     << " from default" << std::endl;
+        return result;
+    }
+
+    template <typename T>
+    static inline auto convert_to_variant(const T &s)
+        -> std::enable_if_t<std::is_constructible_v<std::string, T>, VarType>
+    {
+        return std::string(s);
+    }
+
+    template <typename T>
+    static inline auto convert_to_variant(const T &val)
+        -> std::enable_if_t<!std::is_constructible_v<std::string, T>, VarType>
+    {
+        return val;
+    }
+
+  private:
+    template <typename T> static constexpr bool is_supported_type()
+    {
+        return std::is_same_v<T, bool> || std::is_same_v<T, int> ||
+               std::is_same_v<T, std::string>;
+    }
+
+    static std::string toLower(const std::string &str)
+    {
+        std::string lower;
+        lower.reserve(str.size());
+        std::transform(str.begin(), str.end(), std::back_inserter(lower),
+                       [](unsigned char c) { return std::tolower(c); });
+        return lower;
+    }
+
+    // 值解析器
+    template <typename T> static T parseEnvValue(const char *env)
+    {
+        if constexpr (std::is_same_v<T, int>)
+        {
+            try
+            {
+                return std::stoi(env);
+            }
+            catch (...)
+            {
+                return DEFAULT_VALUE_INT;
+            }
+        }
+        else if constexpr (std::is_same_v<T, bool>)
+        {
+            std::string lower = toLower(env);
+            if (lower == "true" || lower == "1")
+                return true;
+            if (lower == "false" || lower == "0")
+                return false;
+            return std::stoi(env) != 0;
+        }
+        else if constexpr (std::is_same_v<T, std::string>)
+        {
+            return env;
+        }
+    }
+
+    // Get value from real environment
+    template <typename T>
+    static T getEnvInner(const std::string &env_name, bool *set)
+    {
+        const char *env = std::getenv(env_name.c_str());
+        if (!env)
+        {
+            *set = false;
+            return {};
+        }
+
+        *set = true;
+        return parseEnvValue<T>(env);
+    }
+
+    // Default values for fallback
+    template <typename T> static T getDefault()
+    {
+        if constexpr (std::is_same_v<T, int>)
+        {
+            return DEFAULT_VALUE_INT;
+        }
+        else if constexpr (std::is_same_v<T, bool>)
+        {
+            return DEFAULT_VALUE_BOOL;
+        }
+        else if constexpr (std::is_same_v<T, std::string>)
+        {
+            return std::string(DEFAULT_VALUE_STRING);
+        }
+    }
+
+    static inline std::unordered_map<std::string, VarType> &GetRegistryManager()
+    {
+        static std::unordered_map<std::string, VarType> registry_manager;
+        return registry_manager;
+    }
+
+    static std::string VariantToString(const VarType &var)
+    {
+        return std::visit(
+            [](const auto &value)
+            {
+                std::stringstream ss;
+                ss << value;
+                return ss.str();
+            },
+            var);
+    }
+};
+
+#define REGISTER_ENVIRONMENT_VARIABLE(name, value)                             \
+    ::systrace::util::env::EnvVarRegistry::RegisterEnv(                        \
+        name,                                                                  \
+        ::systrace::util::env::EnvVarRegistry::convert_to_variant(value))
+
+void REGISTER_ENV();
+
+} // namespace env
+void InitializeSystemUtilities();
+} // namespace util
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/protos/CMakeLists.txt b/systrace/protos/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56eb91f320f796bbab740bb67c1c6aeb64f84647
--- /dev/null
+++ b/systrace/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+project(general_pb2)
+
+set(PB_FILES systrace.pb.cc systrace.pb-c.c)
+
+add_library(${PROJECT_NAME} STATIC ${PB_FILES})
+
+include_directories(${PROJECT_SOURCE_DIR}
+    ${GOOGLE_PROTOBUF_DIR}/include
+)
+
+link_directories(${GOOGLE_PROTOBUF_DIR}/lib/)
+
+target_link_libraries(${PROJECT_NAME}
+    protobuf
+    protobuf-c
+)
+
diff --git a/systrace/protos/systrace.proto b/systrace/protos/systrace.proto
new file mode 100644
index 0000000000000000000000000000000000000000..aa0e8f71b85f36be112326aff86ada54bbd19bb4
--- /dev/null
+++ b/systrace/protos/systrace.proto
@@ -0,0 +1,62 @@
+syntax = "proto3";
+
+message StackFrame {
+    uint64 address = 1;
+    string so_name = 2;
+}
+
+message MemAllocEntry {
+  uint64 alloc_ptr = 1;
+  uint32 stage_id = 2;
+  StageType stage_type = 3;
+  uint64 mem_size = 4;
+  repeated StackFrame stack_frames = 5;
+}
+
+message MemFreeEntry {
+  uint64 alloc_ptr = 1;
+  uint32 stage_id = 2;
+  StageType stage_type = 3;
+}
+
+message ProcMem {
+  uint32 pid = 1;
+  repeated MemAllocEntry mem_alloc_stacks = 2;
+  repeated MemFreeEntry mem_free_stacks = 3;
+}
+
+enum StageType {
+  STAGE_UNKNOWN = 0;
+  STAGE_DATALOADER = 1;
+  STAGE_FORWARD = 2;
+  STAGE_BACKWARD = 3;
+  STAGE_SYNCHRONIZATION = 4;
+  STAGE_GC = 5;
+}
+
+message GcDebugData {
+  uint32 collected = 1;
+  uint32 uncollectable = 2;
+}
+
+message PytorchStage {
+  uint32 stage_id = 1;
+  string stage_type = 2;
+  uint64 start_us = 3;
+  uint64 end_us = 4;
+  repeated string stack_frames = 5;
+  oneof debug_data {
+    GcDebugData gc_debug = 6;
+  }
+}
+
+message Pytorch {
+  repeated PytorchStage pytorch_stages = 1;
+  uint32 rank = 2;
+  uint32 step_id = 3;  
+  string comm = 4;  //任务名
+}
+
+message Mem {
+  repeated ProcMem proc_mem = 1;
+}
\ No newline at end of file
diff --git a/systrace/src/ascend/hook.cc b/systrace/src/ascend/hook.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c1681285b5a96d85dcc0b3a2191f8611e7a66224
--- /dev/null
+++ b/systrace/src/ascend/hook.cc
@@ -0,0 +1,74 @@
+#include "hook.h"
+#include "../src/trace/systrace_manager.h"
+#include <cstdlib>
+#include <dlfcn.h>
+#include <iostream>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    static void *load_symbol(const char *func_name)
+    {
+        if (!g_hal_lib)
+        {
+            g_hal_lib = dlopen("libascendcl.so", RTLD_LAZY);
+            if (!g_hal_lib)
+            {
+                fprintf(stderr, "[Hook] Failed to dlopen libascendcl.so: %s\n",
+                        dlerror());
+                return nullptr;
+            }
+        }
+
+        void *func = dlsym(g_hal_lib, func_name);
+        if (!func)
+        {
+            fprintf(stderr, "[Hook] Failed to dlsym %s: %s\n", func_name,
+                    dlerror());
+        }
+        else
+        {
+            std::cout << "[Hook] Successfully hooked " << func_name
+                      << std::endl;
+        }
+        return func;
+    }
+
+#define HOOKED_FUNCTION(func_ptr, func_name, ...)                              \
+    if (!func_ptr)                                                             \
+    {                                                                          \
+        func_ptr = (decltype(func_ptr))load_symbol(func_name);                 \
+        if (!func_ptr)                                                         \
+            return -1;                                                         \
+    }                                                                          \
+    ::systrace::SysTrace::getInstance();                                       \
+    return func_ptr(__VA_ARGS__);
+
+    EXPOSE_API aclError aclInit(const char *configPath)
+    {
+        HOOKED_FUNCTION(orig_aclInit, "aclInit", configPath);
+    }
+
+    EXPOSE_API aclError aclrtMapMem(void *virPtr, size_t size, size_t offset,
+                                    aclrtDrvMemHandle handle, uint64_t flags)
+    {
+        HOOKED_FUNCTION(orig_aclrtMapMem, "aclrtMapMem", virPtr, size, offset,
+                        handle, flags);
+    }
+
+    EXPOSE_API aclError aclrtLaunchKernel(aclrtFuncHandle func, int workDim,
+                                          void **workGroup,
+                                          size_t *localWorkSize,
+                                          aclrtStream stream, void *event,
+                                          void *config)
+    {
+        HOOKED_FUNCTION(orig_aclrtLaunchKernel, "aclrtLaunchKernel", func,
+                        workDim, workGroup, localWorkSize, stream, event,
+                        config);
+    }
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/systrace/src/ascend/hook.h b/systrace/src/ascend/hook.h
new file mode 100644
index 0000000000000000000000000000000000000000..adbe0a03e02996f1437024ba9ea252a854659615
--- /dev/null
+++ b/systrace/src/ascend/hook.h
@@ -0,0 +1,41 @@
+#pragma once
+#include "../../include/common/macro.h"
+#include <dlfcn.h>
+#include <functional>
+#include <string>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    typedef int aclError;
+    typedef void *aclrtStream;
+    typedef void *aclrtFuncHandle;
+    typedef void *aclrtDrvMemHandle;
+
+    typedef aclError (*aclInitFn)(const char *);
+    typedef aclError (*aclrtMapMemFn)(void *, size_t, size_t, aclrtDrvMemHandle,
+                                      uint64_t);
+    typedef aclError (*aclrtLaunchKernelFn)(aclrtFuncHandle, int, void **,
+                                            size_t *, aclrtStream, void *,
+                                            void *);
+
+    extern void *ascend_hal_handle;
+    extern aclInitFn orig_aclInit;
+    extern aclrtMapMemFn orig_aclrtMapMem;
+    extern aclrtLaunchKernelFn orig_aclrtLaunchKernel;
+
+    aclError aclInit(const char *configPath);
+    aclError aclrtMapMem(void *virPtr, size_t size, size_t offset,
+                         aclrtDrvMemHandle handle, uint64_t flags);
+    aclError aclrtLaunchKernel(aclrtFuncHandle func, int workDim,
+                               void **workGroup, size_t *localWorkSize,
+                               aclrtStream stream, void *event, void *config);
+
+    static void *g_hal_lib = nullptr;
+    aclInitFn orig_aclInit = nullptr;
+    aclrtMapMemFn orig_aclrtMapMem = nullptr;
+    aclrtLaunchKernelFn orig_aclrtLaunchKernel = nullptr;
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/systrace/src/cann/cann_hook.c b/systrace/src/cann/cann_hook.c
new file mode 100644
index 0000000000000000000000000000000000000000..a1bd27d6f7756c9754df4e24e1f6ba384be2cd21
--- /dev/null
+++ b/systrace/src/cann/cann_hook.c
@@ -0,0 +1,532 @@
+#define _GNU_SOURCE
+#include "../../include/common/shared_constants.h"
+#include "../../protos/systrace.pb-c.h"
+#include <dlfcn.h>
+#include <errno.h>
+#include <google/protobuf-c/protobuf-c.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#if defined(__aarch64__)
+#include "../../thirdparty/aarch64/libunwind/libunwind.h"
+#elif defined(__x86_64__)
+#include "../../thirdparty/x86_64/libunwind/libunwind.h"
+#else
+#error "Unsupported architecture - only aarch64 and x86_64 are supported"
+#endif
+
+// export LD_PRELOAD=/home/MindSpeed-LLM-1.0.RC3/libascend_hal_jack.so
+// cd /home/hbdir/mspti_test-megatron
+// conda activate mspti10
+// python -m torch.distributed.launch --nproc_per_node=8 nqq_train_fsdp.py
+// protoc --c_out=. tmp.proto
+
+// drvError_t halMemAlloc(void **pp, unsigned long long size, unsigned long long
+// flag); drvError_t halMemFree(void *pp); drvError_t
+// halMemCreate(drv_mem_handle_t **handle, size_t size, const struct
+// drv_mem_prop *prop, uint64_t flag); drvError_t halMemRelease
+// (drv_mem_handle_t *handle);
+
+#define LOG_INTERVAL_SEC 120
+#define LOG_ITEMS_MIN 1000
+
+typedef int drvError_t;
+
+typedef enum aclrtMemMallocPolicy
+{
+    ACL_MEM_MALLOC_HUGE_FIRST,
+    ACL_MEM_MALLOC_HUGE_ONLY,
+    ACL_MEM_MALLOC_NORMAL_ONLY,
+    ACL_MEM_MALLOC_HUGE_FIRST_P2P,
+    ACL_MEM_MALLOC_HUGE_ONLY_P2P,
+    ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
+    ACL_MEM_TYPE_LOW_BAND_WIDTH = 0x0100,
+    ACL_MEM_TYPE_HIGH_BAND_WIDTH = 0x1000,
+} aclrtMemMallocPolicy;
+typedef drvError_t (*halMemAllocFunc_t)(void **pp, unsigned long long size,
+                                        unsigned long long flag);
+typedef drvError_t (*halMemFreeFunc_t)(void *pp);
+typedef drvError_t (*halMemCreateFunc_t)(void **handle, size_t size, void *prop,
+                                         uint64_t flag);
+typedef drvError_t (*halMemReleaseFunc_t)(void *handle);
+
+typedef drvError_t (*aclrtMallocFunc_t)(void **devPtr, size_t size,
+                                        aclrtMemMallocPolicy policy);
+typedef drvError_t (*aclrtMallocCachedFunc_t)(void **devPtr, size_t size,
+                                              aclrtMemMallocPolicy policy);
+typedef drvError_t (*aclrtMallocAlign32Func_t)(void **devPtr, size_t size,
+                                               aclrtMemMallocPolicy policy);
+typedef drvError_t (*aclrtFreeFunc_t)(void *devPtr);
+
+static halMemAllocFunc_t orig_halMemAlloc = NULL;
+static halMemFreeFunc_t orig_halMemFree = NULL;
+static halMemCreateFunc_t orig_halMemCreate = NULL;
+static halMemReleaseFunc_t orig_halMemRelease = NULL;
+static aclrtMallocFunc_t orig_aclrtMalloc = NULL;
+static aclrtMallocCachedFunc_t orig_aclrtMallocCached = NULL;
+static aclrtMallocAlign32Func_t orig_aclrtMallocAlign32 = NULL;
+static aclrtFreeFunc_t orig_aclrtFree = NULL;
+
+static pthread_key_t thread_data_key;
+static pthread_once_t key_once = PTHREAD_ONCE_INIT;
+static pthread_mutex_t file_mutex = PTHREAD_MUTEX_INITIALIZER;
+extern int global_stage_id;
+extern int global_stage_type;
+
+typedef struct
+{
+    ProcMem *proc_mem;
+    time_t last_log_time;
+} ThreadData;
+
+static void *load_symbol(void *lib, const char *symbol_name)
+{
+    void *sym = dlsym(lib, symbol_name);
+    if (!sym)
+    {
+        fprintf(stderr, "Failed to find symbol %s: %s\n", symbol_name,
+                dlerror());
+    }
+    return sym;
+}
+
+static void free_proc_mem(ProcMem *proc_mem)
+{
+    if (!proc_mem)
+        return;
+
+    // 释放分配记录
+    for (size_t i = 0; i < proc_mem->n_mem_alloc_stacks; i++)
+    {
+        MemAllocEntry *entry = proc_mem->mem_alloc_stacks[i];
+        for (size_t j = 0; j < entry->n_stack_frames; j++)
+        {
+            free((void *)entry->stack_frames[j]->so_name);
+            free(entry->stack_frames[j]);
+        }
+        free(entry->stack_frames);
+        free(entry);
+    }
+    free(proc_mem->mem_alloc_stacks);
+
+    // 释放释放记录
+    for (size_t i = 0; i < proc_mem->n_mem_free_stacks; i++)
+    {
+        free(proc_mem->mem_free_stacks[i]);
+    }
+    free(proc_mem->mem_free_stacks);
+
+    // 重置计数
+    proc_mem->n_mem_alloc_stacks = 0;
+    proc_mem->mem_alloc_stacks = NULL;
+    proc_mem->n_mem_free_stacks = 0;
+    proc_mem->mem_free_stacks = NULL;
+}
+
+static void free_thread_data(void *data)
+{
+    ThreadData *td = (ThreadData *)data;
+    if (td && td->proc_mem)
+    {
+        free_proc_mem(td->proc_mem);
+        free(td->proc_mem);
+    }
+    free(td);
+}
+
+static inline uint32_t get_current_pid() { return (uint32_t)getpid(); }
+
+static void make_key()
+{
+    pthread_key_create(&thread_data_key, free_thread_data);
+}
+
+static ThreadData *get_thread_data()
+{
+    ThreadData *td;
+
+    pthread_once(&key_once, make_key);
+    td = pthread_getspecific(thread_data_key);
+
+    if (!td)
+    {
+        td = calloc(1, sizeof(ThreadData));
+        td->proc_mem = calloc(1, sizeof(ProcMem));
+        proc_mem__init(td->proc_mem);
+        td->proc_mem->pid = get_current_pid();
+        td->last_log_time = time(NULL);
+        pthread_setspecific(thread_data_key, td);
+    }
+
+    return td;
+}
+
+static const char *get_so_name(uint64_t ip)
+{
+    Dl_info info;
+    const char *so_name;
+    if (dladdr((void *)ip, &info))
+    {
+        so_name = strrchr(info.dli_fname, '/');
+        return (so_name != NULL) ? so_name + 1 : info.dli_fname;
+    }
+    return "unknown";
+}
+
+static void get_log_filename(time_t current, uint32_t pid, char *buf,
+                             size_t buf_size)
+{
+    const char *rank_str = getenv("RANK");
+    int rank = rank_str ? atoi(rank_str) : 0;
+    struct tm *tm = localtime(&current);
+
+    const char *dir_path = SYS_TRACE_ROOT_DIR "cann";
+    if (access(dir_path, F_OK) != 0)
+    {
+        if (mkdir(dir_path, 0755) != 0 && errno != EEXIST)
+        {
+            perror("Failed to create directory");
+            snprintf(buf, buf_size, "mem_trace_%04d%02d%02d_%02d_%u_rank%d.pb",
+                     tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+                     tm->tm_hour, pid, rank);
+            return;
+        }
+    }
+    snprintf(buf, buf_size, "%s/mem_trace_%04d%02d%02d_%02d_%u_rank%d.pb",
+             dir_path, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+             tm->tm_hour, pid, rank);
+}
+
+static char is_ready_to_write(ThreadData *td, time_t *current)
+{
+    ProcMem *proc_mem = td->proc_mem;
+    if (!proc_mem ||
+        (proc_mem->n_mem_alloc_stacks + proc_mem->n_mem_free_stacks == 0))
+    {
+        return 0;
+    }
+
+    *current = time(NULL);
+    if (proc_mem->n_mem_alloc_stacks + proc_mem->n_mem_free_stacks <
+        LOG_ITEMS_MIN)
+    {
+        if (*current - td->last_log_time < LOG_INTERVAL_SEC)
+        {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+static void write_protobuf_to_file()
+{
+    time_t current;
+    uint8_t *buf;
+    ThreadData *td = get_thread_data();
+    if (!td)
+    {
+        return;
+    }
+
+    if (!is_ready_to_write(td, &current))
+    {
+        return;
+    }
+
+    if (pthread_mutex_trylock(&file_mutex) == 0)
+    { // pthread_mutex_trylock or pthread_mutex_lock
+        char filename[256];
+        get_log_filename(current, td->proc_mem->pid, filename,
+                         sizeof(filename));
+
+        size_t len = proc_mem__get_packed_size(td->proc_mem);
+        buf = malloc(len);
+        proc_mem__pack(td->proc_mem, buf);
+
+        FILE *fp = fopen(filename, "ab");
+        if (fp)
+        {
+            fwrite(buf, len, 1, fp);
+            fclose(fp);
+        }
+
+        pthread_mutex_unlock(&file_mutex);
+    }
+    else
+    {
+        return;
+    }
+
+    if (buf)
+    {
+        free(buf);
+    }
+
+    free_proc_mem(td->proc_mem);
+    td->last_log_time = current;
+}
+
+static void exit_handler(void) { write_protobuf_to_file(); }
+
+int init_mem_trace()
+{
+    void *lib =
+        dlopen("/usr/local/Ascend/ascend-toolkit/latest/lib64/libascendcl.so",
+               RTLD_LAZY);
+    if (!lib)
+    {
+        fprintf(stderr, "dlopen failed: %s\n", dlerror());
+        return -1;
+    }
+
+    orig_halMemAlloc = (halMemAllocFunc_t)load_symbol(lib, "halMemAlloc");
+    orig_halMemFree = (halMemFreeFunc_t)load_symbol(lib, "halMemFree");
+    orig_halMemCreate = (halMemCreateFunc_t)load_symbol(lib, "halMemCreate");
+    orig_halMemRelease = (halMemReleaseFunc_t)load_symbol(lib, "halMemRelease");
+    orig_aclrtMalloc = (aclrtMallocFunc_t)load_symbol(lib, "aclrtMalloc");
+    orig_aclrtMallocCached =
+        (aclrtMallocCachedFunc_t)load_symbol(lib, "aclrtMallocCached");
+    orig_aclrtMallocAlign32 =
+        (aclrtMallocAlign32Func_t)load_symbol(lib, "aclrtMallocAlign32");
+    orig_aclrtFree = (aclrtFreeFunc_t)load_symbol(lib, "aclrtFree");
+
+    if (!orig_halMemAlloc || !orig_halMemFree || !orig_aclrtMalloc ||
+        !orig_aclrtFree || !orig_halMemCreate || !orig_halMemRelease ||
+        !orig_aclrtMallocCached || orig_aclrtMallocAlign32)
+    {
+        return -1;
+    }
+
+    atexit(exit_handler);
+
+    return 0;
+}
+
+unw_word_t get_so_base(unw_word_t addr)
+{
+    Dl_info info;
+    if (dladdr((void *)addr, &info) != 0)
+    {
+        return (unw_word_t)info.dli_fbase;
+    }
+    return 0;
+}
+
+static void collect_stack_frames(MemAllocEntry *entry)
+{
+    unw_cursor_t cursor;
+    unw_context_t context;
+    unw_word_t ip;
+    int frame_count = 0;
+    const int max_frames = 32;
+
+    unw_getcontext(&context);
+    unw_init_local(&cursor, &context);
+
+    entry->stack_frames = calloc(max_frames, sizeof(StackFrame *));
+    while (unw_step(&cursor) > 0 && frame_count < max_frames)
+    {
+        unw_get_reg(&cursor, UNW_REG_IP, &ip);
+
+        // Get the SO name and base address for this IP
+        const char *so_name = get_so_name(ip);
+        unw_word_t so_base = get_so_base(ip); // You'll need to implement this
+
+        StackFrame *frame = malloc(sizeof(StackFrame));
+        stack_frame__init(frame);
+        frame->address =
+            ip - so_base; // Store offset within SO instead of virtual address
+        frame->so_name = strdup(so_name);
+
+        entry->stack_frames[frame_count] = frame;
+        entry->n_stack_frames++;
+
+        frame_count++;
+    }
+}
+
+static void add_mem_alloc_entry(void *pp, size_t size)
+{
+    ThreadData *td = get_thread_data();
+
+    MemAllocEntry *entry = malloc(sizeof(MemAllocEntry));
+    mem_alloc_entry__init(entry);
+    entry->alloc_ptr = (uint64_t)pp;
+    entry->mem_size = size;
+    entry->stage_id = global_stage_id;
+    entry->stage_type = global_stage_type;
+    entry->n_stack_frames = 0;
+    entry->stack_frames = NULL;
+
+    collect_stack_frames(entry);
+
+    td->proc_mem->n_mem_alloc_stacks++;
+    td->proc_mem->mem_alloc_stacks =
+        realloc(td->proc_mem->mem_alloc_stacks,
+                td->proc_mem->n_mem_alloc_stacks * sizeof(MemAllocEntry *));
+    td->proc_mem->mem_alloc_stacks[td->proc_mem->n_mem_alloc_stacks - 1] =
+        entry;
+}
+
+static void add_mem_free_entry(void *pp)
+{
+    ThreadData *td = get_thread_data();
+
+    MemFreeEntry *entry = malloc(sizeof(MemFreeEntry));
+    mem_free_entry__init(entry);
+    entry->alloc_ptr = (uint64_t)pp;
+    entry->stage_id = global_stage_id;
+    entry->stage_type = global_stage_type;
+
+    td->proc_mem->n_mem_free_stacks++;
+    td->proc_mem->mem_free_stacks =
+        realloc(td->proc_mem->mem_free_stacks,
+                td->proc_mem->n_mem_free_stacks * sizeof(MemFreeEntry *));
+    td->proc_mem->mem_free_stacks[td->proc_mem->n_mem_free_stacks - 1] = entry;
+}
+
+drvError_t halMemAlloc(void **pp, unsigned long long size,
+                       unsigned long long flag)
+{
+    if (!orig_halMemAlloc)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_halMemAlloc(pp, size, flag);
+    if (ret == 0 && pp && *pp)
+    {
+        add_mem_alloc_entry(*pp, size);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t halMemFree(void *pp)
+{
+    if (!orig_halMemFree)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_halMemFree(pp);
+    if (ret == 0 && pp)
+    {
+        add_mem_free_entry(pp);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy)
+{
+    if (!orig_aclrtMalloc)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_aclrtMalloc(devPtr, size, policy);
+    if (ret == 0 && devPtr && *devPtr)
+    {
+        add_mem_alloc_entry(*devPtr, size);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t aclrtMallocCached(void **devPtr, size_t size,
+                             aclrtMemMallocPolicy policy)
+{
+    if (!orig_aclrtMallocCached)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_aclrtMallocCached(devPtr, size, policy);
+    if (ret == 0 && devPtr && *devPtr)
+    {
+        add_mem_alloc_entry(*devPtr, size);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t aclrtMallocAlign32(void **devPtr, size_t size,
+                              aclrtMemMallocPolicy policy)
+{
+    if (!orig_aclrtMallocAlign32)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_aclrtMallocAlign32(devPtr, size, policy);
+    if (ret == 0 && devPtr && *devPtr)
+    {
+        add_mem_alloc_entry(*devPtr, size);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t aclrtFree(void *devPtr)
+{
+    if (!orig_aclrtFree)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_aclrtFree(devPtr);
+    if (ret == 0 && devPtr)
+    {
+        add_mem_free_entry(devPtr);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t halMemCreate(void **handle, size_t size, void *prop, uint64_t flag)
+{
+    if (!orig_halMemCreate)
+    {
+        init_mem_trace();
+    }
+    int ret = orig_halMemCreate(handle, size, prop, flag);
+    if (ret == 0 && handle && *handle)
+    {
+        add_mem_alloc_entry(*handle, size);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
+
+drvError_t halMemRelease(void *handle)
+{
+    if (!orig_halMemRelease)
+    {
+        init_mem_trace();
+    }
+
+    int ret = orig_halMemRelease(handle);
+    if (ret == 0 && handle)
+    {
+        add_mem_free_entry(handle);
+    }
+
+    write_protobuf_to_file();
+
+    return ret;
+}
\ No newline at end of file
diff --git a/systrace/src/mspti/json_file_writer.h b/systrace/src/mspti/json_file_writer.h
new file mode 100644
index 0000000000000000000000000000000000000000..9b19788c2b2ee6f807e2b69a98640a438f977f1a
--- /dev/null
+++ b/systrace/src/mspti/json_file_writer.h
@@ -0,0 +1,189 @@
+#pragma once
+#include "../../include/common/shared_constants.h"
+#include "../../include/common/util.h"
+#include "mspti.h"
+#include <atomic>
+#include <condition_variable>
+#include <fstream>
+#include <iostream>
+#include <json/json.h>
+#include <mutex>
+#include <string.h>
+#include <thread>
+#include <vector>
+
+class MSPTIHcclFileWriter
+{
+  private:
+    std::ofstream file;
+    std::mutex buffermtx;
+    std::mutex bufferMarkerMtx;
+    std::mutex threadmtx;
+    std::atomic<bool> opened;
+    std::unique_ptr<std::vector<msptiActivityMarker>> markerActivityBuffer;
+    std::thread writerThread;
+    std::condition_variable cv;
+    std::atomic<bool> stop;
+    Json::Value root = Json::Value(Json::ValueType::arrayValue);
+
+  public:
+    MSPTIHcclFileWriter(const std::string &filename)
+    {
+        // obtain environment variable LOCAL_RANK
+        // to determine the rank of the process
+        // and append it to the filename
+        const char *path = std::getenv("METRIC_PATH");
+        std::string savePath = path ? path : SYS_TRACE_ROOT_DIR "mspti/";
+        if (systrace::util::fs_utils::CreateDirectoryIfNotExists(savePath))
+        {
+            STLOG(ERROR) << "[MSPTI] Failed to create dump directory";
+            return;
+        }
+        std::string savePathStr = savePath;
+        if (!savePathStr.empty() && savePathStr.back() != '/')
+        {
+            savePathStr += "/";
+        }
+        std::string saveFilename = savePathStr + filename;
+        std::string filenameWithRank = saveFilename;
+        this->markerActivityBuffer =
+            std::make_unique<std::vector<msptiActivityMarker>>();
+
+        const char *localRankCStr = std::getenv("RANK");
+        if (localRankCStr == nullptr)
+        {
+            localRankCStr = "-1";
+        }
+        std::string localRank =
+            localRankCStr; // Now safe to construct std::string
+        auto rank = std::stoi(localRank);
+        if (saveFilename.length() >= 5 &&
+            saveFilename.substr(saveFilename.length() - 5) == ".json")
+        {
+            std::string baseName =
+                saveFilename.substr(0, saveFilename.length() - 5);
+            filenameWithRank = baseName + "." + std::to_string(rank) + ".json";
+        }
+        else
+        {
+            filenameWithRank = saveFilename + "." + std::to_string(rank);
+        }
+        std::cout << "Filename: " << filenameWithRank << std::endl;
+        this->file.open(filenameWithRank, std::ios::out | std::ios::app);
+        this->opened.store(true);
+        this->stop.store(false);
+        this->run();
+    }
+
+    void stopWriter()
+    {
+        if (this->file.is_open())
+        {
+            {
+                std::unique_lock<std::mutex> lock(this->threadmtx);
+                this->stop.store(true);
+            }
+            this->cv.notify_all();
+            this->hcclActivityFormatToJson();
+            if (this->writerThread.joinable())
+            {
+                this->writerThread.join();
+            }
+            this->file.close();
+            this->opened.store(false);
+        }
+    }
+
+    ~MSPTIHcclFileWriter() { this->stopWriter(); }
+
+    bool fileExists(const std::string &fp)
+    {
+        std::ifstream file(fp.c_str());
+        return file.good() && file.is_open();
+    }
+
+    void bufferMarkerActivity(msptiActivityMarker *activity)
+    {
+        std::lock_guard<std::mutex> lock(this->bufferMarkerMtx);
+        this->markerActivityBuffer->push_back(*activity);
+    }
+
+    void run()
+    {
+        // a thread to periodically flush
+        // the buffer to the file
+        // watch the conditional variable for signal
+        this->writerThread = std::thread(
+            [this]()
+            {
+                while (!this->stop.load())
+                {
+                    std::unique_lock<std::mutex> lock(this->threadmtx);
+                    if (this->cv.wait_for(lock, std::chrono::seconds(5)) ==
+                        std::cv_status::timeout)
+                    {
+                        this->hcclActivityFormatToJson();
+                    }
+                    else if (this->stop.load())
+                    {
+                        break;
+                    };
+                }
+            });
+    }
+
+    void hcclActivityFormatToJson()
+    {
+        std::lock_guard<std::mutex> lock(this->buffermtx);
+        if (this->file.is_open())
+        {
+            for (auto activity : *this->markerActivityBuffer)
+            {
+                Json::Value markerJson;
+                markerJson["Kind"] = activity.kind;
+                markerJson["SourceKind"] = activity.sourceKind;
+                markerJson["Timestamp"] = activity.timestamp;
+                markerJson["Id"] = activity.id;
+                markerJson["Flag"] = activity.flag;
+                Json::Value msptiObjecId;
+                if (activity.sourceKind == MSPTI_ACTIVITY_SOURCE_KIND_HOST)
+                {
+                    Json::Value pt;
+                    pt["ProcessId"] = activity.objectId.pt.processId;
+                    pt["ThreadId"] = activity.objectId.pt.threadId;
+                    Json::Value ds;
+                    ds["DeviceId"] = activity.objectId.pt.processId;
+                    ds["StreamId"] = activity.objectId.pt.threadId;
+                    msptiObjecId["Pt"] = pt;
+                    msptiObjecId["Ds"] = ds;
+                }
+                else if (activity.sourceKind ==
+                         MSPTI_ACTIVITY_SOURCE_KIND_DEVICE)
+                {
+                    Json::Value ds;
+                    ds["DeviceId"] = activity.objectId.ds.deviceId;
+                    ds["StreamId"] = activity.objectId.ds.streamId;
+                    Json::Value pt;
+                    pt["ProcessId"] = activity.objectId.ds.deviceId;
+                    pt["ThreadId"] = activity.objectId.ds.streamId;
+                    msptiObjecId["Pt"] = pt;
+                    msptiObjecId["Ds"] = ds;
+                }
+                markerJson["msptiObjectId"] = msptiObjecId;
+                markerJson["Name"] = activity.name;
+                this->root.append(markerJson);
+            }
+            if (this->root.size() > 0)
+            {
+                Json::StyledWriter writer;
+                this->file << writer.write(this->root);
+                this->root.clear();
+            }
+            this->markerActivityBuffer->clear();
+        }
+        else
+        {
+            std::cout << "File is not open" << std::endl;
+        }
+    }
+};
\ No newline at end of file
diff --git a/systrace/src/mspti/mspti_tracker.cpp b/systrace/src/mspti/mspti_tracker.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d6f1285f3172f482391ab7479def6392ba6563db
--- /dev/null
+++ b/systrace/src/mspti/mspti_tracker.cpp
@@ -0,0 +1,96 @@
+#include "mspti_tracker.hpp"
+#include <dlfcn.h>
+#include <iostream>
+#include <stdlib.h>
+
+constexpr size_t KB = 1 * 1024;
+constexpr size_t MB = 1 * 1024 * KB;
+constexpr size_t ALIGN_SIZE = 8;
+
+std::mutex MSPTITracker::mtx;
+
+inline uint8_t *align_buffer(uint8_t *buffer, size_t align)
+{
+    return reinterpret_cast<uint8_t *>(
+        (reinterpret_cast<uintptr_t>(buffer) + (align - 1)) & ~(align - 1));
+}
+
+MSPTITracker::MSPTITracker()
+{
+    std::cout << "Logging initialized from preloaded library." << std::endl;
+    hcclFileWriter =
+        std::make_unique<MSPTIHcclFileWriter>("hccl_activity.json");
+    msptiSubscribe(&subscriber, nullptr, nullptr);
+    msptiActivityRegisterCallbacks(UserBufferRequest, UserBufferComplete);
+    msptiActivityEnable(MSPTI_ACTIVITY_KIND_MARKER);
+}
+
+MSPTITracker::~MSPTITracker()
+{
+    msptiActivityFlushAll(1);
+    msptiActivityDisable(MSPTI_ACTIVITY_KIND_MARKER);
+    finish();
+}
+
+MSPTITracker &MSPTITracker::getInstance()
+{
+    static MSPTITracker instance;
+    return instance;
+}
+
+void MSPTITracker::finish()
+{
+    std::cout << "Finishing MSPTI Tracker" << std::endl;
+    if (hcclFileWriter)
+    {
+        hcclFileWriter->stopWriter();
+    }
+}
+
+void MSPTITracker::readActivityMarker(msptiActivityMarker *activity)
+{
+    if (hcclFileWriter)
+    {
+        hcclFileWriter->bufferMarkerActivity(activity);
+    }
+}
+
+void MSPTITracker::UserBufferRequest(uint8_t **buffer, size_t *size,
+                                     size_t *maxNumRecords)
+{
+    auto &instance = getInstance();
+    std::lock_guard<std::mutex> lock(mtx);
+    constexpr uint32_t SIZE = (uint32_t)MB * 1;
+    instance.requestedCount.fetch_add(1);
+    uint8_t *pBuffer = (uint8_t *)malloc(SIZE + ALIGN_SIZE);
+    *buffer = align_buffer(pBuffer, ALIGN_SIZE);
+    *size = MB * 1;
+    *maxNumRecords = 0;
+}
+
+void MSPTITracker::UserBufferComplete(uint8_t *buffer, size_t size,
+                                      size_t validSize)
+{
+    auto &instance = getInstance();
+    if (validSize > 0)
+    {
+        msptiActivity *pRecord = nullptr;
+        msptiResult status = MSPTI_SUCCESS;
+        do
+        {
+            std::lock_guard<std::mutex> lock(mtx);
+            status = msptiActivityGetNextRecord(buffer, validSize, &pRecord);
+            if (status == MSPTI_SUCCESS &&
+                pRecord->kind == MSPTI_ACTIVITY_KIND_MARKER)
+            {
+                instance.readActivityMarker(
+                    reinterpret_cast<msptiActivityMarker *>(pRecord));
+            }
+            else if (status == MSPTI_ERROR_MAX_LIMIT_REACHED)
+            {
+                break;
+            }
+        } while (status == MSPTI_SUCCESS);
+    }
+    free(buffer);
+}
\ No newline at end of file
diff --git a/systrace/src/mspti/mspti_tracker.hpp b/systrace/src/mspti/mspti_tracker.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a1c75bcda7e0aead480b0d09e5b43cf42f012775
--- /dev/null
+++ b/systrace/src/mspti/mspti_tracker.hpp
@@ -0,0 +1,33 @@
+#include "json_file_writer.h"
+#include "mspti.h"
+#include <atomic>
+#include <memory>
+#include <mutex>
+
+class MSPTITracker
+{
+  private:
+    static std::mutex mtx;
+
+    msptiSubscriberHandle subscriber;
+    std::unique_ptr<MSPTIHcclFileWriter> hcclFileWriter;
+    std::atomic<int> requestedCount{0};
+
+    MSPTITracker();
+    ~MSPTITracker();
+
+  public:
+    MSPTITracker(const MSPTITracker &) = delete;
+    MSPTITracker &operator=(const MSPTITracker &) = delete;
+
+    static MSPTITracker &getInstance();
+
+    msptiSubscriberHandle *getSubscriber() { return &subscriber; }
+    void finish();
+    void readActivityMarker(msptiActivityMarker *activity);
+
+    static void UserBufferRequest(uint8_t **buffer, size_t *size,
+                                  size_t *maxNumRecords);
+    static void UserBufferComplete(uint8_t *buffer, size_t size,
+                                   size_t validSize);
+};
\ No newline at end of file
diff --git a/systrace/src/trace/CMakeLists.txt b/systrace/src/trace/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eff8f276da7450360da0a83006a0515a6ff81ea
--- /dev/null
+++ b/systrace/src/trace/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_definitions(-DSYSTRACE_EXPORTS -D_GLIBCXX_USE_CXX11_ABI=1)
+
+set(PYTHON_TRACING_SOURCES
+    python/pytorch_tracing_loader.cc
+    python/pytorch_tracing_manager.cc
+)
+
+set_source_files_properties(
+    systrace_manager.cc
+    library_loader.cc
+    ${PYTHON_TRACING_SOURCES}
+    PROPERTIES COMPILE_FLAGS "-fPIC -Wall -Wextra"
+)
\ No newline at end of file
diff --git a/systrace/src/trace/library_loader.cc b/systrace/src/trace/library_loader.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a74bb376f9feb7f4922564a3380acc9dfcbc2801
--- /dev/null
+++ b/systrace/src/trace/library_loader.cc
@@ -0,0 +1,46 @@
+#include "library_loader.h"
+#include "../../include/common/logging.h"
+#include <dlfcn.h>
+
+namespace systrace
+{
+
+DynamicLibraryLoader::DynamicLibraryLoader(const std::string &library_path)
+    : library_handle_(nullptr), is_usable_(false), library_path_(library_path)
+{
+    LoadDynamicLibrary();
+}
+
+DynamicLibraryLoader::~DynamicLibraryLoader()
+{
+    if (library_handle_)
+    {
+        dlclose(library_handle_);
+        library_handle_ = nullptr;
+    }
+}
+
+void DynamicLibraryLoader::LoadDynamicLibrary()
+{
+    if (library_handle_)
+    {
+        STLOG(WARNING) << "Library already loaded: " << library_path_;
+        return;
+    }
+
+    dlerror();
+
+    library_handle_ = dlopen(library_path_.c_str(), RTLD_LAZY);
+    if (!library_handle_)
+    {
+        const char *error_message = dlerror();
+        STLOG(WARNING) << "Failed to load library: "
+                       << (error_message ? error_message : "Unknown error");
+        is_usable_ = false;
+        return;
+    }
+
+    is_usable_ = true;
+}
+
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/src/trace/library_loader.h b/systrace/src/trace/library_loader.h
new file mode 100644
index 0000000000000000000000000000000000000000..abae8d75fdac612ee417a8ea631d6cf59bb7da19
--- /dev/null
+++ b/systrace/src/trace/library_loader.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "../../include/common/util.h"
+#include <map>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace systrace
+{
+
+class DynamicLibraryLoader
+{
+  protected:
+    void *library_handle_;
+    bool is_usable_;
+    const std::string library_path_;
+
+    void LoadDynamicLibrary();
+
+  public:
+    explicit DynamicLibraryLoader(const std::string &library_path);
+    virtual ~DynamicLibraryLoader();
+
+    bool IsLibraryLoaded() const
+    {
+        return library_handle_ != nullptr && is_usable_;
+    }
+    void *GetLibraryHandle() const { return library_handle_; }
+};
+
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing.c b/systrace/src/trace/python/pytorch_tracing.c
new file mode 100644
index 0000000000000000000000000000000000000000..ce3b651425dcdc9bf7155f6ee05d3fc977465d60
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing.c
@@ -0,0 +1,618 @@
+#include "pytorch_tracing.h"
+#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 11
+#include <pyframe.h>
+#endif
+
+Stagetype determine_stage_type(const char *function_name)
+{
+    if (function_name == NULL)
+    {
+        return UNKNOWN;
+    }
+
+    if (strcmp(function_name, "GC") == 0)
+    {
+        return GC;
+    }
+    if (strcmp(function_name,
+               "torch.utils.data.dataloader@_BaseDataLoaderIter@__next__") == 0)
+    {
+        return DATALOADER;
+    }
+    if (strcmp(function_name, "torch_npu@npu@synchronize") == 0 ||
+        strcmp(function_name, "torch_npu.npu@Event@synchronize") == 0 ||
+        strcmp(function_name, "torch_npu.npu@Event@wait") == 0 ||
+        strcmp(function_name, "torch_npu.npu@Stream@synchronize") == 0 ||
+        strcmp(function_name, "torch_npu.npu@Stream@wait_event") == 0 ||
+        strcmp(function_name, "torch_npu.npu@Stream@wait_stream") == 0)
+    {
+        return SYNCHRONIZATION;
+    }
+    if (strcmp(function_name, "torch@autograd@backward") == 0 ||
+        strcmp(function_name, "torch@autograd@grad") == 0)
+    {
+        return BACKWARD;
+    }
+    if (strcmp(function_name,
+               "megatron.core.pipeline_parallel@schedules@forward_step") == 0)
+    {
+        return FORWARD;
+    }
+    if (strcmp(function_name,
+               "megatron.core.pipeline_parallel@schedules@backward_step") == 0)
+    {
+        return BACKWARD;
+    }
+    return UNKNOWN;
+}
+
+static int register_tracing_function(const char *name, int index, char **errors)
+{
+    int64_t code_address;
+    int is_native;
+    int ret =
+        GetFuncAddressByPython(name, errors + index, &code_address, &is_native);
+
+    if (ret)
+    {
+        printf("register function `%s` error\n", name);
+        return ret;
+    }
+
+    printf("register function `%s` at address %ld\n", name, code_address);
+    addTracingData(index, name);
+
+    TracingFunction *traced_function =
+        (TracingFunction *)malloc(sizeof(TracingFunction));
+    traced_function->tag_name = index;
+    traced_function->function_name = strdup(name);
+    traced_function->py_code_address = code_address;
+    traced_function->is_native = is_native;
+
+    HASH_ADD(hh, pytorch_tracing_func_map, py_code_address, sizeof(int64_t),
+             traced_function);
+
+    return 0;
+}
+
+static void set_profiler_for_all_threads()
+{
+    PyEval_SetProfile(profiler, NULL);
+
+    PyThreadState *tstate = PyThreadState_Get();
+    PyThreadState *thread_array[PY_TRACING_MAX_THREADS];
+    memset(thread_array, 0, sizeof(thread_array));
+
+    int thread_count = 0;
+    while (tstate != NULL && thread_count < PY_TRACING_MAX_THREADS)
+    {
+        thread_array[thread_count++] = tstate;
+        printf("Set profiler for thread %ld\n", tstate->thread_id);
+        tstate = PyThreadState_Next(tstate);
+    }
+
+    for (int i = 0; i < thread_count; i++)
+    {
+        PyThreadState_Swap(thread_array[i]);
+        PyEval_SetProfile(profiler, NULL);
+    }
+
+    PyThreadState_Swap(thread_array[0]);
+}
+
+#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 11
+static void capture_stack(PyFrameObject *frame, PyTorchTracingData *trace_entry)
+{
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    int depth = 0;
+    while (frame && depth < MAX_STACK_DEPTH)
+    {
+        PyCodeObject *code = PyFrame_GetCode(frame);
+        if (!code)
+        {
+            break;
+        }
+
+        const char *name = PyUnicode_AsUTF8(code->co_name);
+        const char *file = PyUnicode_AsUTF8(code->co_filename);
+        int line = PyFrame_GetLineNumber(frame);
+
+        snprintf(trace_entry->stack_info[depth], 256, "%s@%s:%d",
+                 name ? name : "unknown", file ? file : "unknown", line);
+
+        PyFrameObject *next_frame = PyFrame_GetBack(frame);
+        Py_DECREF(code);
+        frame = next_frame;
+
+        depth++;
+    }
+    trace_entry->stack_depth = depth;
+    PyGILState_Release(gstate);
+}
+
+uint64_t getCodeOfFrame(PyFrameObject *frame)
+{
+    return (int64_t)(uintptr_t)PyFrame_GetCode(frame);
+}
+#else
+static void capture_stack(PyFrameObject *frame, PyTorchTracingData *trace_entry)
+{
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    int depth = 0;
+    while (frame && depth < MAX_STACK_DEPTH)
+    {
+        snprintf(trace_entry->stack_info[depth], 256, "%s@%s:%d",
+                 PyUnicode_AsUTF8(frame->f_code->co_name),
+                 PyUnicode_AsUTF8(frame->f_code->co_filename),
+                 PyFrame_GetLineNumber(frame));
+        frame = frame->f_back;
+        depth++;
+    }
+    trace_entry->stack_depth = depth;
+    PyGILState_Release(gstate);
+}
+
+uint64_t getCodeOfFrame(PyFrameObject *frame)
+{
+    return (int64_t)(uintptr_t)(frame->f_code);
+}
+
+#endif
+
+uint64_t getMsTime()
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (uint64_t)tv.tv_sec * 1000000 + (uint64_t)tv.tv_usec;
+}
+
+static void ensure_python_initialized()
+{
+    if (!Py_IsInitialized())
+    {
+        Py_Initialize();
+    }
+}
+
+TracingFunction *isTracedPyTorchFunction(PyFrameObject *frame)
+{
+    uint64_t code_address = getCodeOfFrame(frame);
+    TracingFunction *traced_function = NULL;
+    HASH_FIND(hh, pytorch_tracing_func_map, &code_address, sizeof(int64_t),
+              traced_function);
+    return traced_function;
+}
+
+static int profiler(PyObject *obj, PyFrameObject *frame, int what,
+                    PyObject *arg)
+{
+    TracingFunction *func_data = isTracedPyTorchFunction(frame);
+    if (!func_data)
+        return 0;
+    int tag_name = func_data->tag_name;
+    int stage_type = determine_stage_type(func_data->function_name);
+    if ((what == PyTrace_CALL) && start_tracing)
+    {
+        pthread_mutex_lock(&mutex);
+        TracingData *tracing_data = receiveTracingData(tag_name);
+        PyTorchTracingDataArray *curr_data = tracing_data->curr_data;
+        if (curr_data->cur == PY_TRACING_BUFFER_SIZE)
+        {
+            systrace_return_pytorch_tracing_data_array(
+                curr_data, PY_TRACING_READY_POOL, tag_name);
+            tracing_data->curr_data =
+                systrace_get_empty_pytorch_tracing_data_array(tag_name);
+            curr_data = tracing_data->curr_data;
+        }
+        curr_data->data[curr_data->cur].start = getMsTime();
+        if (stage_type == DATALOADER)
+        {
+            global_stage_id++;
+        }
+        curr_data->data[curr_data->cur].stage_id = global_stage_id;
+        curr_data->data[curr_data->cur].stage_type = stage_type;
+        global_stage_type = stage_type;
+        capture_stack(frame, &curr_data->data[curr_data->cur]);
+
+        pthread_mutex_unlock(&mutex);
+    }
+    else if (what == PyTrace_RETURN)
+    {
+        pthread_mutex_lock(&mutex);
+        TracingData *tracing_data = receiveTracingData(tag_name);
+        if (start_tracing)
+        {
+            PyTorchTracingDataArray *curr_data = tracing_data->curr_data;
+            curr_data->data[curr_data->cur].count = tracing_data->count;
+            curr_data->data[curr_data->cur++].end = getMsTime();
+        }
+        tracing_data->count++;
+        pthread_mutex_unlock(&mutex);
+    }
+    return 0;
+}
+
+static int set_error_message(char **error_message, const char *format, ...) {
+    va_list args;
+    va_start(args, format);
+    int size = vsnprintf(NULL, 0, format, args) + 1;
+    va_end(args);
+    
+    *error_message = malloc(size);
+    if (!*error_message) return 0;
+    
+    va_start(args, format);
+    vsnprintf(*error_message, size, format, args);
+    va_end(args);
+    
+    return 1;
+}
+
+static int parse_input_string(const char *code, char ***tokens, int *token_count) {
+    char *copy = strdup(code);
+    if (!copy) return 0;
+    
+    char *saveptr = NULL;
+    *token_count = 0;
+    *tokens = malloc(3 * sizeof(char*));
+    if (!*tokens) {
+        free(copy);
+        return 0;
+    }
+    
+    for (char *token = strtok_r(copy, "@", &saveptr); 
+         token && *token_count < 3; 
+         token = strtok_r(NULL, "@", &saveptr)) {
+        (*tokens)[(*token_count)++] = strdup(token);
+    }
+    
+    free(copy);
+    return 1;
+}
+
+static char* build_python_code(const char *code, char **tokens, int token_count) {
+    const char *template = 
+        "try:\n"
+        "    obj = None\n"
+        "%s\n"
+        "    while hasattr(obj, '__wrapped__'):\n"
+        "        obj = getattr(obj, '__wrapped__')\n"
+        "    if hasattr(obj, '__code__'):\n"
+        "        address = id(obj.__code__)\n"
+        "        is_native = 0\n"
+        "    else:\n"
+        "        address = id(obj)\n"
+        "        is_native = 1\n"
+        "except Exception as e:\n"
+        "    raise\n";
+    
+    char *import_part = NULL;
+    if (token_count == 3) {
+        asprintf(&import_part, 
+            "    from %s import %s as mm\n"
+            "    obj = getattr(mm, '%s')", 
+            tokens[0], tokens[1], tokens[2]);
+    } else if (token_count == 2) {
+        asprintf(&import_part, 
+            "    from %s import %s as obj", 
+            tokens[0], tokens[1]);
+    } else {
+        asprintf(&import_part, 
+            "    obj = globals().get('%s')\n"
+            "    if obj is None:\n"
+            "        raise ValueError('Global object not found: %s')", 
+            code, code);
+    }
+    
+    char *python_code = NULL;
+    asprintf(&python_code, template, import_part);
+    free(import_part);
+    
+    return python_code;
+}
+
+static int execute_python_code(const char *python_code, int use_globals, 
+                              int64_t *address, int *is_native, char **error_message) {
+    PyObject *globals = use_globals ? PyEval_GetGlobals() : PyDict_New();
+    PyObject *locals = PyDict_New();
+    
+    if (!globals || !locals) {
+        if (!use_globals && globals) Py_DECREF(globals);
+        if (locals) Py_DECREF(locals);
+        return set_error_message(error_message, "Failed to create Python dictionaries");
+    }
+    
+    PyObject *result = PyRun_String(python_code, Py_file_input, globals, locals);
+    if (!result) {
+        PyObject *ptype, *pvalue, *ptraceback;
+        PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+        PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
+        
+        if (pvalue) {
+            PyObject *py_str = PyObject_Str(pvalue);
+            if (py_str) {
+                const char *str_error = PyUnicode_AsUTF8(py_str);
+                set_error_message(error_message, "Python error: %s", str_error ? str_error : "Unknown error");
+                Py_DECREF(py_str);
+            }
+        }
+        
+        Py_XDECREF(ptype);
+        Py_XDECREF(pvalue);
+        Py_XDECREF(ptraceback);
+        PyErr_Clear();
+        
+        if (!use_globals) Py_DECREF(globals);
+        Py_DECREF(locals);
+        return 1;
+    }
+    Py_DECREF(result);
+    
+    PyObject *py_address = PyDict_GetItemString(locals, "address");
+    PyObject *py_is_native = PyDict_GetItemString(locals, "is_native");
+    
+    if (!py_address || !py_is_native) {
+        if (!use_globals) Py_DECREF(globals);
+        Py_DECREF(locals);
+        return set_error_message(error_message, "Failed to get address or is_native from execution");
+    }
+    
+    *address = PyLong_AsLongLong(py_address);
+    *is_native = PyLong_AsLongLong(py_is_native);
+    
+    if (!use_globals) Py_DECREF(globals);
+    Py_DECREF(locals);
+    return 0;
+}
+
+static int GetFuncAddressByPython(const char *code, char **error_message,
+                         int64_t *address, int *is_native) {
+    *error_message = NULL;
+    *address = 0;
+    *is_native = 0;
+    
+    if (!code || !*code) {
+        return set_error_message(error_message, "Empty or NULL code parameter");
+    }
+    
+    char **tokens = NULL;
+    int token_count = 0;
+    if (!parse_input_string(code, &tokens, &token_count)) {
+        return set_error_message(error_message, "Failed to parse input string");
+    }
+    
+    char *python_code = build_python_code(code, tokens, token_count);
+    if (!python_code) {
+        for (int i = 0; i < token_count; i++) free(tokens[i]);
+        free(tokens);
+        return set_error_message(error_message, "Failed to build Python code");
+    }
+    
+    int use_globals = (token_count == 0);
+    int result = execute_python_code(python_code, use_globals, address, is_native, error_message);
+    
+    free(python_code);
+    for (int i = 0; i < token_count; i++) free(tokens[i]);
+    free(tokens);
+    
+    if (result == 0) {
+        set_error_message(error_message, "Get __code__ attribute for '%s' OK", code);
+    }
+    
+    return result;
+}
+static TracingData *receiveTracingData(int name)
+{
+    return pytorch_tracing_data_array + name;
+}
+
+static void addTracingData(int name, const char *func_name)
+{
+    TracingData *v = receiveTracingData(name);
+    v->tag_name = name;
+    v->curr_data = systrace_get_empty_pytorch_tracing_data_array(name);
+    v->function_name = strdup(func_name);
+}
+
+static void getGcInfo(PyTorchTracingData *data, PyObject *info)
+{
+    if (!PyDict_Check(info))
+        return;
+    PyObject *collected = PyDict_GetItemString(info, "collected");
+    PyObject *uncollectable = PyDict_GetItemString(info, "uncollectable");
+
+    if (collected && PyLong_Check(collected))
+    {
+        data->payload.gc_debug[0] = PyLong_AsLong(collected);
+    }
+    else
+    {
+        data->payload.gc_debug[0] = -1;
+    }
+
+    if (uncollectable && PyLong_Check(uncollectable))
+    {
+        data->payload.gc_debug[1] = PyLong_AsLong(uncollectable);
+    }
+    else
+    {
+        data->payload.gc_debug[1] = -1;
+    }
+}
+
+static void gcCallback(PyObject *phase, PyObject *info)
+{
+    pthread_mutex_lock(&mutex);
+    if (PyUnicode_CompareWithASCIIString(phase, "start") == 0 && start_tracing)
+    {
+        TracingData *tracing_data = receiveTracingData(PY_TRACING_GC);
+        PyTorchTracingDataArray *curr_data = tracing_data->curr_data;
+        if (curr_data->cur == PY_TRACING_BUFFER_SIZE)
+        {
+            systrace_return_pytorch_tracing_data_array(
+                curr_data, PY_TRACING_READY_POOL, PY_TRACING_GC);
+            tracing_data->curr_data =
+                systrace_get_empty_pytorch_tracing_data_array(PY_TRACING_GC);
+            curr_data = tracing_data->curr_data;
+        }
+        curr_data->data[curr_data->cur].start = getMsTime();
+        pthread_mutex_unlock(&mutex);
+    }
+    else if (PyUnicode_CompareWithASCIIString(phase, "stop") == 0)
+    {
+        TracingData *tracing_data = receiveTracingData(PY_TRACING_GC);
+        if (start_tracing)
+        {
+            PyTorchTracingDataArray *curr_data = tracing_data->curr_data;
+            if (start_tracing)
+            {
+                curr_data->data[curr_data->cur].count = tracing_data->count;
+                curr_data->data[curr_data->cur].type = PAYLOAD_GC;
+                getGcInfo(curr_data->data + curr_data->cur, info);
+                curr_data->data[curr_data->cur++].end = getMsTime();
+            }
+            curr_data->data[curr_data->cur].count = tracing_data->count;
+            curr_data->data[curr_data->cur].stage_id = global_stage_id;
+            curr_data->data[curr_data->cur++].end = getMsTime();
+        }
+        tracing_data->count++;
+    }
+    pthread_mutex_unlock(&mutex);
+}
+
+static PyObject *gcCallbackWrapper(PyObject *self, PyObject *args,
+                                   PyObject *kwargs)
+{
+    PyObject *phase, *info;
+    if (!PyArg_ParseTuple(args, "OO", &phase, &info))
+    {
+        return NULL;
+    }
+    gcCallback(phase, info);
+    Py_RETURN_NONE;
+}
+
+static PyTypeObject GcCallbackType = {
+    PyVarObject_HEAD_INIT(NULL, 0) "gc_callback", /* tp_name */
+    sizeof(PyObject),                             /* tp_basicsize */
+    0,                                            /* tp_itemsize */
+    0,                                            /* tp_dealloc */
+    0,                                            /* tp_vectorcall_offset */
+    0,                                            /* tp_getattr */
+    0,                                            /* tp_setattr */
+    0,                                            /* tp_as_async */
+    0,                                            /* tp_repr */
+    0,                                            /* tp_as_number */
+    0,                                            /* tp_as_sequence */
+    0,                                            /* tp_as_mapping */
+    0,                                            /* tp_hash  */
+    gcCallbackWrapper,                            /* tp_call */
+    0,                                            /* tp_str */
+    0,                                            /* tp_getattro */
+    0,                                            /* tp_setattro */
+    0,                                            /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                           /* tp_flags */
+    0,                                            /* tp_doc */
+    0,                                            /* tp_traverse */
+    0,                                            /* tp_clear */
+    0,                                            /* tp_richcompare */
+    0,                                            /* tp_weaklistoffset */
+    0,                                            /* tp_iter */
+    0,                                            /* tp_iternext */
+    0,                                            /* tp_methods */
+    0,                                            /* tp_members */
+    0,                                            /* tp_getset */
+    0,                                            /* tp_base */
+    0,                                            /* tp_dict */
+    0,                                            /* tp_descr_get */
+    0,                                            /* tp_descr_set */
+    0,                                            /* tp_dictoffset */
+    0,                                            /* tp_init */
+    0,                                            /* tp_alloc */
+    0,                                            /* tp_new */
+};
+
+PyTorchTracingDataArray *
+systrace_get_partial_pytorch_tracing_data_array(int name)
+{
+    pthread_mutex_lock(&mutex);
+    TracingData *tracing_data = receiveTracingData(name);
+    if ((!tracing_data || !tracing_data->curr_data) ||
+        (tracing_data->curr_data->cur == 0))
+    {
+        pthread_mutex_unlock(&mutex);
+        return NULL;
+    }
+    PyTorchTracingDataArray *result = tracing_data->curr_data;
+    tracing_data->curr_data =
+        systrace_get_empty_pytorch_tracing_data_array(name);
+    pthread_mutex_unlock(&mutex);
+    return result;
+}
+
+void systrace_register_gc(char **error_message)
+{
+    addTracingData(PY_TRACING_GC, "GC");
+    PyObject *gc_module = PyImport_ImportModule("gc");
+    if (!gc_module)
+    {
+        return;
+    }
+
+    PyObject *callbacks_list = PyObject_GetAttrString(gc_module, "callbacks");
+    if (!callbacks_list || !PyList_Check(callbacks_list))
+    {
+        Py_XDECREF(callbacks_list);
+        Py_DECREF(gc_module);
+        return;
+    }
+
+    PyObject *py_callback = PyObject_New(PyObject, &GcCallbackType);
+
+    if (!py_callback)
+    {
+        Py_DECREF(callbacks_list);
+        Py_DECREF(gc_module);
+        return;
+    }
+
+    if (PyList_Append(callbacks_list, py_callback) != 0)
+    {
+        Py_DECREF(py_callback);
+        Py_DECREF(callbacks_list);
+        Py_DECREF(gc_module);
+        return;
+    }
+
+    Py_DECREF(callbacks_list);
+    Py_DECREF(gc_module);
+    *error_message = strdup("Import gc Ok");
+}
+
+static void init_tracing_data_array(int count)
+{
+    tracing_data_count = count;
+    pytorch_tracing_data_array =
+        (TracingData *)malloc(sizeof(TracingData) * tracing_data_count);
+    memset(pytorch_tracing_data_array, 0,
+           sizeof(TracingData) * tracing_data_count);
+}
+
+void systrace_register_tracing(const char **names, int count, char **errors)
+{
+    ensure_python_initialized();
+
+    PyGILState_STATE gstate = PyGILState_Ensure();
+
+    init_tracing_data_array(count);
+    systrace_register_gc(errors);
+
+    for (int i = 1; i < count; i++)
+    {
+        register_tracing_function(names[i], i, errors);
+    }
+
+    set_profiler_for_all_threads();
+
+    PyGILState_Release(gstate);
+}
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing.h b/systrace/src/trace/python/pytorch_tracing.h
new file mode 100644
index 0000000000000000000000000000000000000000..5209b2886cfb5d4c247c4b3becf01d076044e4a6
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing.h
@@ -0,0 +1,69 @@
+#include <Python.h>
+#include <frameobject.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "../../../include/common/shared_constants.h"
+#include "../../../thirdparty/uthash.h"
+#include "pytorch_tracing_data.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    __attribute__((visibility("default"))) PyTorchTracingDataArray *
+    systrace_get_empty_pytorch_tracing_data_array(int);
+    __attribute__((visibility("default"))) PyTorchTracingDataArray *
+    systrace_get_full_pytorch_tracing_data_array(int);
+
+    __attribute__((visibility("default"))) PyTorchTracingDataArray *
+    systrace_get_partial_pytorch_tracing_data_array(int);
+
+    __attribute__((visibility("default"))) void
+    systrace_return_pytorch_tracing_data_array(PyTorchTracingDataArray *,
+                                               int type, int name);
+    __attribute__((visibility("default"))) void
+    systrace_register_tracing(const char **, int, char **);
+#ifdef __cplusplus
+}
+#endif
+typedef struct
+{
+    int64_t py_code_address;
+    const char *function_name;
+    int tag_name;
+    int is_native;
+    UT_hash_handle hh;
+} TracingFunction;
+
+typedef struct
+{
+    int tag_name;
+    PyTorchTracingDataArray *curr_data;
+    int64_t count;
+    const char *function_name;
+} TracingData;
+
+typedef struct _frame PyFrameObject;
+uint64_t getCodeOfFrame(PyFrameObject *frame);
+static void capture_stack(PyFrameObject *frame,
+                          PyTorchTracingData *trace_entry);
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static TracingData *pytorch_tracing_data_array = NULL;
+
+static TracingFunction *pytorch_tracing_func_map = NULL;
+static int start_tracing = 1;
+static int tracing_data_count = 0;
+
+static int GetFuncAddressByPython(const char *input, char **error_message,
+                                  int64_t *code_address, int *is_native);
+static uint64_t getMsTime();
+static TracingFunction *isTracedPyTorchFunction(PyFrameObject *frame);
+static TracingData *receiveTracingData(int name);
+static void addTracingData(int name, const char *func_name);
+static int profiler(PyObject *obj, PyFrameObject *frame, int what,
+                    PyObject *arg);
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing_data.h b/systrace/src/trace/python/pytorch_tracing_data.h
new file mode 100644
index 0000000000000000000000000000000000000000..f8a601df9eaae87e866ba9fe32bfa140c7ab8ddb
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing_data.h
@@ -0,0 +1,54 @@
+#pragma once
+#include <stdint.h>
+
+#ifndef PY_TRACING_BUFFER_SIZE
+#define PY_TRACING_BUFFER_SIZE 512
+#define PY_TRACING_MAX_THREADS 256
+#endif
+#define PY_TRACING_READY_POOL 0
+#define PY_TRACING_EMPTY_POOL 1
+#define PY_TRACING_GC 0
+#define PY_DATALOADER 1
+
+#define MAX_STACK_DEPTH 32
+#define MAX_STACK_FRAME_LENGTH 256
+
+typedef enum
+{
+    PAYLOAD_UNINITIALIZED = 0,
+    PAYLOAD_GC = 1,
+} PayloadType;
+
+typedef enum
+{
+    UNKNOWN = 0,
+    DATALOADER,
+    FORWARD,
+    BACKWARD,
+    SYNCHRONIZATION,
+    GC,
+} Stagetype;
+
+typedef union
+{
+    int gc_debug[2];
+} Payload;
+
+typedef struct
+{
+    uint64_t start;
+    uint64_t end;
+    uint32_t count;
+    uint32_t stage_id;
+    Stagetype stage_type;
+    Payload payload;
+    PayloadType type;
+    char stack_info[MAX_STACK_DEPTH][256];
+    int stack_depth;
+} PyTorchTracingData;
+
+typedef struct
+{
+    PyTorchTracingData data[PY_TRACING_BUFFER_SIZE];
+    uint64_t cur;
+} PyTorchTracingDataArray;
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing_loader.cc b/systrace/src/trace/python/pytorch_tracing_loader.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fab92ffbc4068dcf0446108f8e5d379faa8c1944
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing_loader.cc
@@ -0,0 +1,121 @@
+#include "pytorch_tracing_loader.h"
+#include "../../../include/common/logging.h"
+#include <cstring>
+#include <dlfcn.h>
+
+namespace systrace
+{
+namespace pytorch_tracing
+{
+
+PyTorchTracingLibrary::PyTorchTracingLibrary(const std::string &library_path)
+    : DynamicLibraryLoader(library_path), register_tracing_(nullptr),
+      get_tracing_data_(nullptr), get_partial_tracing_data_(nullptr),
+      return_tracing_data_(nullptr)
+{
+    if (library_handle_)
+    {
+        InitializeSymbols();
+    }
+}
+
+void PyTorchTracingLibrary::InitializeSymbols()
+{
+    std::vector<SymbolConfig> configs = {
+        {"systrace_register_tracing",
+         [this]() { return reinterpret_cast<void *>(&register_tracing_); },
+         "TracingRegistrationFunc"},
+
+        {"systrace_get_full_pytorch_tracing_data_array",
+         [this]() { return reinterpret_cast<void *>(&get_tracing_data_); },
+         "DataArrayRetrievalAllFunc"},
+
+        {"systrace_return_pytorch_tracing_data_array",
+         [this]() { return reinterpret_cast<void *>(&return_tracing_data_); },
+         "DataArrayReleaseFunc"},
+
+        {"systrace_get_partial_pytorch_tracing_data_array", [this]()
+         { return reinterpret_cast<void *>(&get_partial_tracing_data_); },
+         "GetPartialTracingDataArrayPartFunc"}};
+
+    is_usable_ = std::all_of(configs.begin(), configs.end(),
+                             [this](const SymbolConfig &config)
+                             { return LoadSymbol(config); });
+}
+
+bool PyTorchTracingLibrary::LoadSymbol(const SymbolConfig &config)
+{
+    void *symbol = dlsym(library_handle_, config.name);
+    if (!symbol)
+    {
+        STLOG(WARNING) << "Failed to load symbol: " << config.name
+                       << " (type: " << config.type_name
+                       << "), error: " << dlerror();
+        return false;
+    }
+
+    *reinterpret_cast<void **>(config.loader()) = symbol;
+    return true;
+}
+
+std::vector<std::string>
+PyTorchTracingLibrary::Register(const std::vector<std::string> &names)
+{
+    if (!is_usable_)
+    {
+        return {};
+    }
+
+    auto error_holder = std::unique_ptr<char *[], std::function<void(char **)>>(
+        new char *[names.size()],
+        [size = names.size()](char **ptr)
+        {
+            for (size_t i = 0; i < size; ++i)
+            {
+                free(ptr[i]);
+            }
+            delete[] ptr;
+        });
+    std::memset(error_holder.get(), 0, names.size() * sizeof(char *));
+
+    std::vector<const char *> c_str_array;
+    c_str_array.reserve(names.size());
+    std::transform(names.begin(), names.end(), std::back_inserter(c_str_array),
+                   [](const std::string &str) { return str.c_str(); });
+
+    register_tracing_(c_str_array.data(), c_str_array.size(),
+                      error_holder.get());
+
+    std::vector<std::string> result;
+    for (size_t i = 0; i < names.size(); ++i)
+    {
+        if (error_holder[i])
+        {
+            result.emplace_back(error_holder[i]);
+        }
+    }
+    return result;
+}
+
+PyTorchTracingDataArray *PyTorchTracingLibrary::RetrieveAllTracingData(int name)
+{
+    return is_usable_ ? get_tracing_data_(name) : nullptr;
+}
+
+PyTorchTracingDataArray *
+PyTorchTracingLibrary::RetrievePartialTracingData(int name)
+{
+    return is_usable_ ? get_partial_tracing_data_(name) : nullptr;
+}
+
+void PyTorchTracingLibrary::ReleaseTracingData(PyTorchTracingDataArray *data,
+                                               int type, int name)
+{
+    if (is_usable_ && data)
+    {
+        return_tracing_data_(data, type, name);
+    }
+}
+
+} // namespace pytorch_tracing
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing_loader.h b/systrace/src/trace/python/pytorch_tracing_loader.h
new file mode 100644
index 0000000000000000000000000000000000000000..9d5aaa49855cd9314f92bd69c622b8d5ed88f5d7
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing_loader.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "../../../include/common/macro.h"
+#include "../library_loader.h"
+#include "pytorch_tracing_data.h"
+
+namespace systrace
+{
+namespace pytorch_tracing
+{
+
+class PyTorchTracingLibrary : public DynamicLibraryLoader
+{
+  public:
+    explicit PyTorchTracingLibrary(const std::string &);
+    using TracingRegistrationFunc = void (*)(const char **, int, char **);
+    using DataArrayRetrievalAllFunc = PyTorchTracingDataArray *(*)(int);
+    using GetPartialTracingDataArrayPartFunc =
+        PyTorchTracingDataArray *(*)(int);
+    using DataArrayReleaseFunc = void (*)(PyTorchTracingDataArray *, int, int);
+    PyTorchTracingDataArray *RetrieveAllTracingData(int);
+    PyTorchTracingDataArray *RetrievePartialTracingData(int);
+    std::vector<std::string> Register(const std::vector<std::string> &names);
+    void ReleaseTracingData(PyTorchTracingDataArray *data, int type, int name);
+
+  private:
+    TracingRegistrationFunc register_tracing_;
+    DataArrayRetrievalAllFunc get_tracing_data_;
+    GetPartialTracingDataArrayPartFunc get_partial_tracing_data_;
+    DataArrayReleaseFunc return_tracing_data_;
+    void InitializeSymbols();
+    struct SymbolConfig
+    {
+        const char *name;
+        std::function<void *(void)> loader;
+        const char *type_name;
+    };
+    bool LoadSymbol(const SymbolConfig &config);
+};
+
+} // namespace pytorch_tracing
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing_manager.cc b/systrace/src/trace/python/pytorch_tracing_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3be99ca325ce956898381905de654cecf18ae193
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing_manager.cc
@@ -0,0 +1,58 @@
+#include "pytorch_tracing_manager.h"
+#include "pytorch_tracing_data.h"
+#include <cstring>
+#include <thread>
+
+namespace systrace
+{
+namespace pytorch_tracing_manager
+{
+
+PyTorchTracingManager &PyTorchTracingManager::getInstance()
+{
+    std::call_once(init_flag_, &PyTorchTracingManager::initSingleton);
+    return *instance_;
+}
+
+void PyTorchTracingManager::initSingleton()
+{
+    instance_ = new PyTorchTracingManager();
+}
+
+PyTorchTracingDataArray *
+PyTorchTracingManager::getEmptyPyTorchTracingDataArray(int name)
+{
+    auto &pool_item = pool_[name];
+    auto *data = pool_item.empty_pool.getObject();
+    std::memset(data, 0, sizeof(PyTorchTracingDataArray));
+    return data;
+}
+
+void PyTorchTracingManager::returnPyTorchTracingDataArray(
+    PyTorchTracingDataArray *array, int type, int name)
+{
+
+    if (!array)
+        return;
+
+    auto &pool_item = pool_[name];
+    int pool_queue_size = 0;
+
+    switch (type)
+    {
+    case PY_TRACING_READY_POOL:
+        pool_item.ready_pool.returnObject(array, &pool_queue_size);
+        break;
+    case PY_TRACING_EMPTY_POOL:
+        pool_item.empty_pool.returnObject(array, &pool_queue_size);
+        break;
+    }
+}
+
+PyTorchTracingDataArray *
+PyTorchTracingManager::getPyTorchTracingDataArray(int name)
+{
+    return pool_[name].ready_pool.getObject<false>();
+}
+} // namespace pytorch_tracing_manager
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/src/trace/python/pytorch_tracing_manager.h b/systrace/src/trace/python/pytorch_tracing_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..ead4e5a72690b17ee7a66ad0c93c464afcc90a72
--- /dev/null
+++ b/systrace/src/trace/python/pytorch_tracing_manager.h
@@ -0,0 +1,73 @@
+#pragma once
+#include <iostream>
+#include <string>
+#include <unordered_map>
+
+#include "../../../include/common/util.h"
+#include "pytorch_tracing.h"
+#include "pytorch_tracing_data.h"
+
+namespace systrace
+{
+namespace pytorch_tracing_manager
+{
+
+class PyTorchTracingManager
+{
+  public:
+    PyTorchTracingManager(const PyTorchTracingManager &) = delete;
+    PyTorchTracingManager &operator=(const PyTorchTracingManager &) = delete;
+    static void initSingleton();
+    static PyTorchTracingManager &getInstance();
+
+    PyTorchTracingDataArray *getEmptyPyTorchTracingDataArray(int name);
+    void returnPyTorchTracingDataArray(PyTorchTracingDataArray *, int,
+                                       int name);
+    PyTorchTracingDataArray *getPyTorchTracingDataArray(int name);
+    PyTorchTracingDataArray *getCurPyTorchTracingDataArray(int name);
+
+  private:
+    PyTorchTracingManager() = default;
+    inline static PyTorchTracingManager *instance_ = nullptr;
+    inline static std::once_flag init_flag_;
+    struct Pool
+    {
+        util::resource::TimerPool<PyTorchTracingDataArray> empty_pool;
+        util::resource::TimerPool<PyTorchTracingDataArray> ready_pool;
+    };
+    std::unordered_map<int, Pool> pool_;
+};
+} // namespace pytorch_tracing_manager
+} // namespace systrace
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    PyTorchTracingDataArray *
+    systrace_get_empty_pytorch_tracing_data_array(int name)
+    {
+        return systrace::pytorch_tracing_manager::PyTorchTracingManager::
+            getInstance()
+                .getEmptyPyTorchTracingDataArray(name);
+    }
+
+    PyTorchTracingDataArray *
+    systrace_get_full_pytorch_tracing_data_array(int name)
+    {
+        return systrace::pytorch_tracing_manager::PyTorchTracingManager::
+            getInstance()
+                .getPyTorchTracingDataArray(name);
+    }
+
+    void
+    systrace_return_pytorch_tracing_data_array(PyTorchTracingDataArray *array,
+                                               int type, int name)
+    {
+        systrace::pytorch_tracing_manager::PyTorchTracingManager::getInstance()
+            .returnPyTorchTracingDataArray(array, type, name);
+    }
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/systrace/src/trace/systrace_manager.cc b/systrace/src/trace/systrace_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..eef5606365b7e98465a732612ed575a82b767897
--- /dev/null
+++ b/systrace/src/trace/systrace_manager.cc
@@ -0,0 +1,243 @@
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <vector>
+
+#include "../../include/common/constant.h"
+#include "../../include/common/shared_constants.h"
+#include "systrace_manager.h"
+
+int global_stage_id = 0;
+int global_stage_type = 0;
+namespace systrace
+{
+
+namespace
+{
+constexpr uint64_t TRACE_INTERVAL = 100;
+constexpr std::chrono::milliseconds POLL_INTERVAL(10);
+} // namespace
+
+PyTorchTrace &PyTorchTrace::getInstance()
+{
+    std::call_once(init_flag_,
+                   []()
+                   {
+                       instance_ = new PyTorchTrace();
+                       instance_->initialize();
+                   });
+    return *instance_;
+}
+
+void PyTorchTrace::initialize()
+{
+    pytorch_trace_.set_rank(config::GlobalConfig::Instance().rank);
+    STLOG(INFO) << "[PyTorchTrace] Rank set to: "
+                << config::GlobalConfig::Instance().rank;
+
+    pytorch_tracing_library_ =
+        new pytorch_tracing::PyTorchTracingLibrary("libsysTrace.so");
+    STLOG(INFO) << "[PyTorchTrace] Tracing library loaded";
+
+    registerTracingFunctions();
+}
+
+void PyTorchTrace::registerTracingFunctions()
+{
+    pytorch_tracing_functions_ = {
+        "GC",
+        "torch.utils.data.dataloader@_BaseDataLoaderIter@__next__",
+        "torch_npu@npu@synchronize",
+        "torch_npu.npu@Event@synchronize",
+        "torch_npu.npu@Event@wait",
+        "torch_npu.npu@Stream@synchronize",
+        "torch_npu.npu@Stream@wait_event",
+        "torch_npu.npu@Stream@wait_stream",
+        "torch@autograd@backward",
+        "torch@autograd@grad",
+        "megatron.core.pipeline_parallel@schedules@forward_step",
+        "megatron.core.pipeline_parallel@schedules@backward_step"};
+
+    auto errors =
+        pytorch_tracing_library_->Register(pytorch_tracing_functions_);
+    for (size_t i = 0; i < pytorch_tracing_functions_.size(); ++i)
+    {
+        STLOG(INFO) << "Registered function: " << pytorch_tracing_functions_[i]
+                    << ", status: " << errors[i];
+    }
+}
+
+bool PyTorchTrace::triggerTrace() { return has_trigger_trace_.exchange(true); }
+
+void PyTorchTrace::dumpPyTorchTracing()
+{
+    const std::string &dump_path =
+        std::string(constant::TorchTraceConstant::DEFAULT_TRACE_DUMP_PATH);
+
+    if (util::fs_utils::CreateDirectoryIfNotExists(dump_path))
+    {
+        STLOG(ERROR) << "[PyTorchTrace] Failed to create dump directory";
+        return;
+    }
+
+    std::lock_guard<std::mutex> lock(trace_mutex_);
+
+    pytorch_trace_.set_rank(config::GlobalConfig::Instance().local_rank);
+    pytorch_trace_.set_comm(config::GlobalConfig::Instance().job_name);
+
+    for (size_t i = 0; i < pytorch_tracing_functions_.size(); ++i)
+    {
+        processFunctionTracingData(i);
+    }
+
+    writeTraceToFile();
+}
+
+void PyTorchTrace::processFunctionTracingData(size_t function_index)
+{
+    std::vector<PyTorchTracingDataArray *> data_holders;
+
+    if (auto data = pytorch_tracing_library_->RetrievePartialTracingData(
+            function_index))
+    {
+        data_holders.push_back(data);
+    }
+
+    while (auto data =
+               pytorch_tracing_library_->RetrieveAllTracingData(function_index))
+    {
+        data_holders.push_back(data);
+    }
+
+    for (auto data : data_holders)
+    {
+        for (uint32_t i = 0; i < data->cur; ++i)
+        {
+            if (data->data[i].start == 0)
+                continue;
+
+            auto trace = pytorch_trace_.add_pytorch_stages();
+            trace->set_start_us(data->data[i].start);
+            trace->set_end_us(data->data[i].end);
+            trace->set_stage_id(data->data[i].count);
+            trace->set_stage_type(pytorch_tracing_functions_[function_index]);
+
+            if (data->data[i].stack_depth > 0)
+            {
+                trace->mutable_stack_frames()->Reserve(
+                    data->data[i].stack_depth);
+                for (int j = 0; j < data->data[i].stack_depth; ++j)
+                {
+                    if (data->data[i].stack_info[j][0] != '\0')
+                    {
+                        trace->add_stack_frames(data->data[i].stack_info[j]);
+                    }
+                }
+            }
+
+            if (data->data[i].type == PAYLOAD_GC)
+            {
+                auto gc_debug = trace->mutable_gc_debug();
+                gc_debug->set_collected(data->data[i].payload.gc_debug[0]);
+                gc_debug->set_uncollectable(data->data[i].payload.gc_debug[1]);
+            }
+        }
+    }
+
+    for (auto data : data_holders)
+    {
+        pytorch_tracing_library_->ReleaseTracingData(
+            data, PY_TRACING_EMPTY_POOL, function_index);
+    }
+}
+
+void PyTorchTrace::writeTraceToFile()
+{
+    const std::string &dump_path =
+        std::string(constant::TorchTraceConstant::DEFAULT_TRACE_DUMP_PATH);
+    std::string file_path =
+        dump_path + "/" +
+        util::fs_utils::GenerateClusterUniqueFilename(".timeline");
+
+    std::ofstream file(file_path, std::ios::binary | std::ios::out);
+    if (!file)
+    {
+        STLOG(ERROR) << "[PyTorchTrace] Failed to open file: " << file_path;
+        return;
+    }
+
+    std::string binary_data;
+    if (!pytorch_trace_.SerializeToString(&binary_data))
+    {
+        STLOG(ERROR) << "[PyTorchTrace] Failed to serialize trace data";
+        return;
+    }
+
+    file << binary_data;
+}
+
+SysTrace &SysTrace::getInstance()
+{
+    std::call_once(init_flag_,
+                   []()
+                   {
+                       instance_ = new SysTrace();
+                       instance_->initializeSystem();
+                   });
+    return *instance_;
+}
+
+SysTrace::~SysTrace() { stopEventPoller(); }
+
+void SysTrace::initializeSystem()
+{
+    if (!config::GlobalConfig::Instance().enable)
+        return;
+
+    systrace::util::InitializeSystemUtilities();
+    MSPTITracker::getInstance();
+    PyTorchTrace::getInstance();
+
+    startEventPoller();
+}
+
+void SysTrace::startEventPoller()
+{
+#ifdef _GNU_SOURCE
+    should_run_ = true;
+    event_poller_ = std::thread(&SysTrace::eventPollerMain, this);
+    pthread_setname_np(event_poller_.native_handle(), "systrace_poller");
+#endif
+    STLOG(INFO) << "[SysTrace] Event poller started";
+}
+
+void SysTrace::stopEventPoller()
+{
+    should_run_ = false;
+    if (event_poller_.joinable())
+    {
+        event_poller_.join();
+    }
+}
+
+void SysTrace::eventPollerMain()
+{
+    while (should_run_)
+    {
+        if (loop_count_++ % TRACE_INTERVAL == 0)
+        {
+            if (PyTorchTrace::getInstance().triggerTrace())
+            {
+                PyTorchTrace::getInstance().dumpPyTorchTracing();
+            }
+        }
+        std::this_thread::sleep_for(POLL_INTERVAL);
+    }
+
+    if (PyTorchTrace::getInstance().triggerTrace())
+    {
+        PyTorchTrace::getInstance().dumpPyTorchTracing();
+    }
+}
+
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/src/trace/systrace_manager.h b/systrace/src/trace/systrace_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..c043aba0b1985269e8eebc963d77e98b0403f06c
--- /dev/null
+++ b/systrace/src/trace/systrace_manager.h
@@ -0,0 +1,76 @@
+#pragma once
+#include <atomic>
+#include <mutex>
+#include <pthread.h>
+#include <thread>
+#include <vector>
+
+#include "../../include/common/logging.h"
+#include "../../include/common/util.h"
+#include "../../protos/systrace.pb.h"
+#include "../mspti/mspti_tracker.hpp"
+#include "library_loader.h"
+#include "python/pytorch_tracing_loader.h"
+
+namespace systrace
+{
+using namespace util;
+
+class PyTorchTrace
+{
+  public:
+    static PyTorchTrace &getInstance();
+
+    void dumpPyTorchTracing();
+    void dumpPyTorchTracing(bool incremental, bool async);
+    bool triggerTrace();
+
+    PyTorchTrace(const PyTorchTrace &) = delete;
+    PyTorchTrace &operator=(const PyTorchTrace &) = delete;
+
+  private:
+    PyTorchTrace() = default;
+    ~PyTorchTrace() = default;
+
+    void initialize();
+    void registerTracingFunctions();
+    void processFunctionTracingData(size_t function_index);
+    void writeTraceToFile();
+
+    inline static PyTorchTrace *instance_ = nullptr;
+    inline static std::once_flag init_flag_;
+
+    Pytorch pytorch_trace_;
+    std::atomic<bool> has_trigger_trace_{false};
+    std::mutex trace_mutex_;
+
+    std::vector<std::string> pytorch_tracing_functions_;
+    pytorch_tracing::PyTorchTracingLibrary *pytorch_tracing_library_;
+};
+
+class SysTrace
+{
+  public:
+    static SysTrace &getInstance();
+
+    SysTrace(const SysTrace &) = delete;
+    SysTrace &operator=(const SysTrace &) = delete;
+
+  private:
+    SysTrace() = default;
+    ~SysTrace();
+
+    void initializeSystem();
+    void startEventPoller();
+    void stopEventPoller();
+    void eventPollerMain();
+
+    inline static SysTrace *instance_ = nullptr;
+    inline static std::once_flag init_flag_;
+
+    std::atomic<bool> should_run_{true};
+    std::atomic<uint64_t> loop_count_{0};
+    std::thread event_poller_;
+};
+
+} // namespace systrace
\ No newline at end of file
diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind-aarch64.h b/systrace/thirdparty/aarch64/libunwind/libunwind-aarch64.h
new file mode 100644
index 0000000000000000000000000000000000000000..f794600637a80430269e472d312a04559dc34e1a
--- /dev/null
+++ b/systrace/thirdparty/aarch64/libunwind/libunwind-aarch64.h
@@ -0,0 +1,291 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2001-2004 Hewlett-Packard Co
+        Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+   Copyright (C) 2013 Linaro Limited
+   Copyright 2022 Blackberry Limited
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef LIBUNWIND_H
+#define LIBUNWIND_H
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C"
+{
+#endif
+
+#include <inttypes.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <ucontext.h>
+
+#ifndef UNW_EMPTY_STRUCT
+#define UNW_EMPTY_STRUCT uint8_t unused;
+#endif
+
+#define UNW_TARGET aarch64
+#define UNW_TARGET_AARCH64 1
+
+#define _U_TDEP_QP_TRUE 0 /* see libunwind-dynamic.h  */
+
+    /* This needs to be big enough to accommodate "struct cursor", while
+       leaving some slack for future expansion.  Changing this value will
+       require recompiling all users of this library.  Stack allocation is
+       relatively cheap and unwind-state copying is relatively rare, so we
+       want to err on making it rather too big than too small.
+
+       Calculation is regs used (64 + 34) * 2 + 40 (bytes of rest of
+       cursor) + padding
+    */
+
+#define UNW_TDEP_CURSOR_LEN 250
+
+    typedef uint64_t unw_word_t;
+    typedef int64_t unw_sword_t;
+
+    typedef long double unw_tdep_fpreg_t;
+
+#define UNW_WORD_MAX UINT64_MAX
+
+    typedef struct
+    {
+        /* no aarch64-specific auxiliary proc-info */
+        UNW_EMPTY_STRUCT
+    } unw_tdep_proc_info_t;
+
+    typedef enum
+    {
+        /* 64-bit general registers.  */
+        UNW_AARCH64_X0,
+        UNW_AARCH64_X1,
+        UNW_AARCH64_X2,
+        UNW_AARCH64_X3,
+        UNW_AARCH64_X4,
+        UNW_AARCH64_X5,
+        UNW_AARCH64_X6,
+        UNW_AARCH64_X7,
+        UNW_AARCH64_X8,
+
+        /* Temporary registers.  */
+        UNW_AARCH64_X9,
+        UNW_AARCH64_X10,
+        UNW_AARCH64_X11,
+        UNW_AARCH64_X12,
+        UNW_AARCH64_X13,
+        UNW_AARCH64_X14,
+        UNW_AARCH64_X15,
+
+        /* Intra-procedure-call temporary registers.  */
+        UNW_AARCH64_X16,
+        UNW_AARCH64_X17,
+
+        /* Callee-saved registers.  */
+        UNW_AARCH64_X18,
+        UNW_AARCH64_X19,
+        UNW_AARCH64_X20,
+        UNW_AARCH64_X21,
+        UNW_AARCH64_X22,
+        UNW_AARCH64_X23,
+        UNW_AARCH64_X24,
+        UNW_AARCH64_X25,
+        UNW_AARCH64_X26,
+        UNW_AARCH64_X27,
+        UNW_AARCH64_X28,
+
+        /* 64-bit frame pointer.  */
+        UNW_AARCH64_X29,
+
+        /* 64-bit link register.  */
+        UNW_AARCH64_X30,
+
+        /* 64-bit stack pointer.  */
+        UNW_AARCH64_SP = 31,
+        UNW_AARCH64_PC,
+        UNW_AARCH64_PSTATE,
+
+        /* Pseudo-register */
+        UNW_AARCH64_RA_SIGN_STATE = 34,
+
+        /* SVE Vector Granule pseudo register */
+        UNW_AARCH64_VG = 46,
+
+        /* 128-bit FP/Advanced SIMD registers.  */
+        UNW_AARCH64_V0 = 64,
+        UNW_AARCH64_V1,
+        UNW_AARCH64_V2,
+        UNW_AARCH64_V3,
+        UNW_AARCH64_V4,
+        UNW_AARCH64_V5,
+        UNW_AARCH64_V6,
+        UNW_AARCH64_V7,
+        UNW_AARCH64_V8,
+        UNW_AARCH64_V9,
+        UNW_AARCH64_V10,
+        UNW_AARCH64_V11,
+        UNW_AARCH64_V12,
+        UNW_AARCH64_V13,
+        UNW_AARCH64_V14,
+        UNW_AARCH64_V15,
+        UNW_AARCH64_V16,
+        UNW_AARCH64_V17,
+        UNW_AARCH64_V18,
+        UNW_AARCH64_V19,
+        UNW_AARCH64_V20,
+        UNW_AARCH64_V21,
+        UNW_AARCH64_V22,
+        UNW_AARCH64_V23,
+        UNW_AARCH64_V24,
+        UNW_AARCH64_V25,
+        UNW_AARCH64_V26,
+        UNW_AARCH64_V27,
+        UNW_AARCH64_V28,
+        UNW_AARCH64_V29,
+        UNW_AARCH64_V30,
+        UNW_AARCH64_V31,
+
+        UNW_AARCH64_FPSR,
+        UNW_AARCH64_FPCR,
+
+        /* For AArch64, the CFA is the value of SP (x31) at the call site of the
+           previous frame.  */
+        UNW_AARCH64_CFA = UNW_AARCH64_SP,
+
+        UNW_TDEP_LAST_REG = UNW_AARCH64_FPCR,
+
+        UNW_TDEP_IP = UNW_AARCH64_X30,
+        UNW_TDEP_SP = UNW_AARCH64_SP,
+        UNW_TDEP_EH = UNW_AARCH64_X0
+
+    } aarch64_regnum_t;
+
+/* Use R0 through R3 to pass exception handling information.  */
+#define UNW_TDEP_NUM_EH_REGS 4
+
+    typedef struct unw_tdep_save_loc
+    {
+        /* Additional target-dependent info on a save location.  */
+        UNW_EMPTY_STRUCT
+    } unw_tdep_save_loc_t;
+
+#ifdef __linux__
+    /* On AArch64, we can directly use ucontext_t as the unwind context,
+     * however, the __reserved struct is quite large: tune it down to only
+     * the necessary used fields.  */
+
+    struct unw_sigcontext
+    {
+        uint64_t fault_address;
+        uint64_t regs[31];
+        uint64_t sp;
+        uint64_t pc;
+        uint64_t pstate;
+        alignas(16) uint8_t __reserved[(66 * 8)];
+    };
+
+    typedef struct
+    {
+        unsigned long uc_flags;
+        struct ucontext *uc_link;
+        stack_t uc_stack;
+#ifndef __ANDROID__
+        sigset_t uc_sigmask;
+#else
+        union
+        {
+            sigset_t uc_sigmask;
+            sigset64_t uc_sigmask64;
+        };
+        char __padding[128 - sizeof(sigset_t)];
+#endif
+        struct unw_sigcontext uc_mcontext;
+    } unw_tdep_context_t;
+
+    typedef struct
+    {
+        uint32_t _ctx_magic;
+        uint32_t _ctx_size;
+        uint32_t fpsr;
+        uint32_t fpcr;
+        uint64_t vregs[64];
+    } unw_fpsimd_context_t;
+#else
+/* On AArch64, we can directly use ucontext_t as the unwind context.  */
+typedef ucontext_t unw_tdep_context_t;
+#endif
+
+#include "libunwind-common.h"
+#include "libunwind-dynamic.h"
+
+#if defined(__FreeBSD__)
+#define UNW_BASE                                                               \
+    register uint64_t unw_base __asm__("x0") =                                 \
+        (uint64_t)unw_ctx->uc_mcontext.mc_gpregs.gp_x;
+#elif defined(__QNX__)
+#define UNW_BASE                                                               \
+    register uint64_t unw_base __asm__("x0") =                                 \
+        (uint64_t)unw_ctx->uc_mcontext.cpu.gpr;
+#else
+#define UNW_BASE                                                               \
+    register uint64_t unw_base __asm__("x0") =                                 \
+        (uint64_t)unw_ctx->uc_mcontext.regs;
+#endif
+
+#define unw_tdep_getcontext(uc)                                                \
+    ({                                                                         \
+        unw_tdep_context_t *unw_ctx = (uc);                                    \
+        UNW_BASE                                                               \
+        __asm__ __volatile__("stp x0, x1, [%[base], #0]\n"                     \
+                             "stp x2, x3, [%[base], #16]\n"                    \
+                             "stp x4, x5, [%[base], #32]\n"                    \
+                             "stp x6, x7, [%[base], #48]\n"                    \
+                             "stp x8, x9, [%[base], #64]\n"                    \
+                             "stp x10, x11, [%[base], #80]\n"                  \
+                             "stp x12, x13, [%[base], #96]\n"                  \
+                             "stp x14, x15, [%[base], #112]\n"                 \
+                             "stp x16, x17, [%[base], #128]\n"                 \
+                             "stp x18, x19, [%[base], #144]\n"                 \
+                             "stp x20, x21, [%[base], #160]\n"                 \
+                             "stp x22, x23, [%[base], #176]\n"                 \
+                             "stp x24, x25, [%[base], #192]\n"                 \
+                             "stp x26, x27, [%[base], #208]\n"                 \
+                             "stp x28, x29, [%[base], #224]\n"                 \
+                             "mov x1, sp\n"                                    \
+                             "stp x30, x1, [%[base], #240]\n"                  \
+                             "adr x1, ret%=\n"                                 \
+                             "str x1, [%[base], #256]\n"                       \
+                             "mov %[base], #0\n"                               \
+                             "ret%=:\n"                                        \
+                             : [base] "+r"(unw_base)                           \
+                             :                                                 \
+                             : "x1", "memory");                                \
+        (int)unw_base;                                                         \
+    })
+#define unw_tdep_is_fpreg UNW_ARCH_OBJ(is_fpreg)
+
+    extern int unw_tdep_is_fpreg(int);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* LIBUNWIND_H */
diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind-common.h b/systrace/thirdparty/aarch64/libunwind/libunwind-common.h
new file mode 100644
index 0000000000000000000000000000000000000000..9c0db22b11df3075b718e08ceb5c89f6d4df57b0
--- /dev/null
+++ b/systrace/thirdparty/aarch64/libunwind/libunwind-common.h
@@ -0,0 +1,335 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2001-2004 Hewlett-Packard Co
+    Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#define UNW_VERSION_MAJOR 1
+#define UNW_VERSION_MINOR 9
+#define UNW_VERSION_EXTRA -pre
+
+#define UNW_VERSION_CODE(maj, min) (((maj) << 16) | (min))
+#define UNW_VERSION UNW_VERSION_CODE(UNW_VERSION_MAJOR, UNW_VERSION_MINOR)
+
+#ifdef __sun
+// On SmartOS, gcc fails with the following error:
+//
+// ../include/libunwind-common.h:43:41: error: expected identifier or '(' before
+// numeric constant # define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U,UNW_TARGET),_)
+//                                         ^
+//
+// workaround is to undefine _U explicitly.
+// see https://github.com/libunwind/libunwind/issues/118 for more details.
+//
+#undef _U
+#endif
+
+#define UNW_PASTE2(x, y) x##y
+#define UNW_PASTE(x, y) UNW_PASTE2(x, y)
+#define UNW_OBJ(fn) UNW_PASTE(UNW_PREFIX, fn)
+#define UNW_ARCH_OBJ(fn) UNW_PASTE(UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _), fn)
+
+#ifdef UNW_LOCAL_ONLY
+#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_UL, UNW_TARGET), _)
+#else /* !UNW_LOCAL_ONLY */
+#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _)
+#endif /* !UNW_LOCAL_ONLY */
+
+/* Error codes.  The unwind routines return the *negated* values of
+   these error codes on error and a non-negative value on success.  */
+typedef enum
+{
+    UNW_ESUCCESS = 0, /* no error */
+    UNW_EUNSPEC,      /* unspecified (general) error */
+    UNW_ENOMEM,       /* out of memory */
+    UNW_EBADREG,      /* bad register number */
+    UNW_EREADONLYREG, /* attempt to write read-only register */
+    UNW_ESTOPUNWIND,  /* stop unwinding */
+    UNW_EINVALIDIP,   /* invalid IP */
+    UNW_EBADFRAME,    /* bad frame */
+    UNW_EINVAL,       /* unsupported operation or bad value */
+    UNW_EBADVERSION,  /* unwind info has unsupported version */
+    UNW_ENOINFO       /* no unwind info found */
+} unw_error_t;
+
+/* The following enum defines the indices for a couple of
+   (pseudo-)registers which have the same meaning across all
+   platforms.  (RO) means read-only.  (RW) means read-write.  General
+   registers (aka "integer registers") are expected to start with
+   index 0.  The number of such registers is architecture-dependent.
+   The remaining indices can be used as an architecture sees fit.  The
+   last valid register index is given by UNW_REG_LAST.  */
+typedef enum
+{
+    UNW_REG_IP = UNW_TDEP_IP, /* (rw) instruction pointer (pc) */
+    UNW_REG_SP = UNW_TDEP_SP, /* (ro) stack pointer */
+    UNW_REG_EH = UNW_TDEP_EH, /* (rw) exception-handling reg base */
+    UNW_REG_LAST = UNW_TDEP_LAST_REG
+} unw_frame_regnum_t;
+
+/* Number of exception-handler argument registers: */
+#define UNW_NUM_EH_REGS UNW_TDEP_NUM_EH_REGS
+
+typedef enum
+{
+    UNW_CACHE_NONE,      /* no caching */
+    UNW_CACHE_GLOBAL,    /* shared global cache */
+    UNW_CACHE_PER_THREAD /* per-thread caching */
+} unw_caching_policy_t;
+
+typedef enum
+{
+    UNW_INIT_SIGNAL_FRAME = 1 /* We know this is a signal frame */
+} unw_init_local2_flags_t;
+
+typedef int unw_regnum_t;
+
+/* The unwind cursor starts at the youngest (most deeply nested) frame
+   and is used to track the frame state as the unwinder steps from
+   frame to frame.  It is safe to make (shallow) copies of variables
+   of this type.  */
+typedef struct unw_cursor
+{
+    unw_word_t opaque[UNW_TDEP_CURSOR_LEN];
+} unw_cursor_t;
+
+/* This type encapsulates the entire (preserved) machine-state.  */
+typedef unw_tdep_context_t unw_context_t;
+
+/* unw_getcontext() fills the unw_context_t pointed to by UC with the
+   machine state as it exists at the call-site.  For implementation
+   reasons, this needs to be a target-dependent macro.  It's easiest
+   to think of unw_getcontext() as being identical to getcontext(). */
+#define unw_getcontext(uc) unw_tdep_getcontext(uc)
+
+/* Return 1 if register number R is a floating-point register, zero
+   otherwise.
+   This routine is signal-safe.  */
+#define unw_is_fpreg(r) unw_tdep_is_fpreg(r)
+
+typedef unw_tdep_fpreg_t unw_fpreg_t;
+
+typedef struct unw_addr_space *unw_addr_space_t;
+
+/* Each target may define it's own set of flags, but bits 0-15 are
+   reserved for general libunwind-use.  */
+#define UNW_PI_FLAG_FIRST_TDEP_BIT 16
+/* The information comes from a .debug_frame section.  */
+#define UNW_PI_FLAG_DEBUG_FRAME 32
+
+typedef struct unw_proc_info
+{
+    unw_word_t start_ip; /* first IP covered by this procedure */
+    unw_word_t end_ip;   /* first IP NOT covered by this procedure */
+#if defined(NEED_LAST_IP)
+    unw_word_t last_ip; /* first IP that could begin another procedure */
+#endif
+    unw_word_t lsda;    /* address of lang.-spec. data area (if any) */
+    unw_word_t handler; /* optional personality routine */
+    unw_word_t gp;      /* global-pointer value for this procedure */
+    unw_word_t flags;   /* misc. flags */
+
+    int format;                 /* unwind-info format (arch-specific) */
+    int unwind_info_size;       /* size of the information (if applicable) */
+    void *unwind_info;          /* unwind-info (arch-specific) */
+    unw_tdep_proc_info_t extra; /* target-dependent auxiliary proc-info */
+} unw_proc_info_t;
+
+typedef int (*unw_reg_states_callback)(void *token, void *reg_states_data,
+                                       size_t reg_states_data_size,
+                                       unw_word_t start_ip, unw_word_t end_ip);
+
+/* These are backend callback routines that provide access to the
+   state of a "remote" process.  This can be used, for example, to
+   unwind another process through the ptrace() interface.  */
+typedef struct unw_accessors
+{
+    /* Look up the unwind info associated with instruction-pointer IP.
+       On success, the routine fills in the PROC_INFO structure.  */
+    int (*find_proc_info)(unw_addr_space_t, unw_word_t, unw_proc_info_t *, int,
+                          void *);
+
+    /* Release any resources (e.g., memory) that were allocated for
+       the unwind info returned in by a previous call to
+       find_proc_info() with NEED_UNWIND_INFO set to 1.  */
+    void (*put_unwind_info)(unw_addr_space_t, unw_proc_info_t *, void *);
+
+    /* Return the list-head of the dynamically registered unwind
+       info.  */
+    int (*get_dyn_info_list_addr)(unw_addr_space_t, unw_word_t *, void *);
+
+    /* Access aligned word at address ADDR.  The value is returned
+       according to the endianness of the host (e.g., if the host is
+       little-endian and the target is big-endian, access_mem() needs
+       to byte-swap the value before returning it).  */
+    int (*access_mem)(unw_addr_space_t, unw_word_t, unw_word_t *, int, void *);
+
+    /* Access register number REG at address ADDR.  */
+    int (*access_reg)(unw_addr_space_t, unw_regnum_t, unw_word_t *, int,
+                      void *);
+
+    /* Access register number REG at address ADDR.  */
+    int (*access_fpreg)(unw_addr_space_t, unw_regnum_t, unw_fpreg_t *, int,
+                        void *);
+
+    int (*resume)(unw_addr_space_t, unw_cursor_t *, void *);
+
+    /* Optional call back to obtain the name of a (static) procedure.
+       Dynamically generated procedures are handled automatically by
+       libunwind.  This callback is optional and may be set to
+       NULL.  */
+    int (*get_proc_name)(unw_addr_space_t, unw_word_t, char *, size_t,
+                         unw_word_t *, void *);
+
+    /* Optional call back to obtain the name of a elf file where the ip belongs
+       to. This callback is optional and may be set to NULL.  */
+    int (*get_elf_filename)(unw_addr_space_t, unw_word_t, char *, size_t,
+                            unw_word_t *, void *);
+
+    /* Optional call back to obtain the start and end ip of a procedure.
+     * procedure ip range is [start, end), the range is without end.
+     * This callback is optional and may be set to NULL.
+     */
+    int (*get_proc_ip_range)(unw_addr_space_t, unw_word_t, unw_word_t *,
+                             unw_word_t *, void *);
+
+    /* Optional call back to return a mask to be used with pointer
+     * authentication on arm64.
+     *
+     * The on bits in the returned mask indicate which bits in a return address
+     * are part of a pointer authentication code.  These are the bits in the
+     * return address to turn off so that the calling frame can be found
+     * for the unwinding to continue.
+     *
+     * The return value must be host-endian.  e.g. if the target is big-endian
+     * and the host is little endian, the implementation of this function
+     * must byte swap.
+     *
+     * This callback is optional and may be set to NULL.  In this case all
+     * the bits in the return address are used, as if no masking were done.
+     */
+    unw_word_t (*ptrauth_insn_mask)(unw_addr_space_t, void *);
+
+} unw_accessors_t;
+
+typedef enum unw_save_loc_type
+{
+    UNW_SLT_NONE,   /* register is not saved ("not an l-value") */
+    UNW_SLT_MEMORY, /* register has been saved in memory */
+    UNW_SLT_REG     /* register has been saved in (another) register */
+} unw_save_loc_type_t;
+
+typedef struct unw_save_loc
+{
+    unw_save_loc_type_t type;
+    union
+    {
+        unw_word_t addr;     /* valid if type==UNW_SLT_MEMORY */
+        unw_regnum_t regnum; /* valid if type==UNW_SLT_REG */
+    } u;
+    unw_tdep_save_loc_t extra; /* target-dependent additional information */
+} unw_save_loc_t;
+
+struct dl_phdr_info;
+typedef int (*unw_iterate_phdr_callback_t)(struct dl_phdr_info *, size_t,
+                                           void *);
+typedef int (*unw_iterate_phdr_func_t)(unw_iterate_phdr_callback_t, void *);
+
+/* These routines work both for local and remote unwinding.  */
+
+#define unw_local_addr_space UNW_OBJ(local_addr_space)
+#define unw_create_addr_space UNW_OBJ(create_addr_space)
+#define unw_destroy_addr_space UNW_OBJ(destroy_addr_space)
+#define unw_get_accessors UNW_ARCH_OBJ(get_accessors)
+#define unw_get_accessors_int UNW_ARCH_OBJ(get_accessors_int)
+#define unw_init_local UNW_OBJ(init_local)
+#define unw_init_local2 UNW_OBJ(init_local2)
+#define unw_init_remote UNW_OBJ(init_remote)
+#define unw_step UNW_OBJ(step)
+#define unw_resume UNW_OBJ(resume)
+#define unw_get_proc_info UNW_OBJ(get_proc_info)
+#define unw_get_proc_info_by_ip UNW_OBJ(get_proc_info_by_ip)
+#define unw_get_proc_info_in_range UNW_OBJ(get_proc_info_in_range)
+#define unw_reg_states_iterate UNW_OBJ(reg_states_iterate)
+#define unw_apply_reg_state UNW_OBJ(apply_reg_state)
+#define unw_get_reg UNW_OBJ(get_reg)
+#define unw_set_reg UNW_OBJ(set_reg)
+#define unw_get_fpreg UNW_OBJ(get_fpreg)
+#define unw_set_fpreg UNW_OBJ(set_fpreg)
+#define unw_get_save_loc UNW_OBJ(get_save_loc)
+#define unw_is_signal_frame UNW_OBJ(is_signal_frame)
+#define unw_is_plt_entry UNW_OBJ(is_plt_entry)
+#define unw_get_proc_name UNW_OBJ(get_proc_name)
+#define unw_get_proc_name_by_ip UNW_OBJ(get_proc_name_by_ip)
+#define unw_get_elf_filename UNW_OBJ(get_elf_filename)
+#define unw_get_elf_filename_by_ip UNW_OBJ(get_elf_filename_by_ip)
+#define unw_set_caching_policy UNW_OBJ(set_caching_policy)
+#define unw_set_cache_size UNW_OBJ(set_cache_size)
+#define unw_set_iterate_phdr_function UNW_OBJ(set_iterate_phdr_function)
+#define unw_regname UNW_ARCH_OBJ(regname)
+#define unw_flush_cache UNW_ARCH_OBJ(flush_cache)
+#define unw_strerror UNW_ARCH_OBJ(strerror)
+
+extern unw_addr_space_t unw_create_addr_space(unw_accessors_t *, int);
+extern void unw_destroy_addr_space(unw_addr_space_t);
+extern unw_accessors_t *unw_get_accessors(unw_addr_space_t);
+extern unw_accessors_t *unw_get_accessors_int(unw_addr_space_t);
+extern void unw_flush_cache(unw_addr_space_t, unw_word_t, unw_word_t);
+extern int unw_set_caching_policy(unw_addr_space_t, unw_caching_policy_t);
+extern int unw_set_cache_size(unw_addr_space_t, size_t, int);
+extern void unw_set_iterate_phdr_function(unw_addr_space_t,
+                                          unw_iterate_phdr_func_t);
+extern const char *unw_regname(unw_regnum_t);
+
+extern int unw_init_local(unw_cursor_t *, unw_context_t *);
+extern int unw_init_local2(unw_cursor_t *, unw_context_t *, int);
+extern int unw_init_remote(unw_cursor_t *, unw_addr_space_t, void *);
+extern int unw_step(unw_cursor_t *);
+extern int unw_resume(unw_cursor_t *);
+extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *);
+extern int unw_get_proc_info_by_ip(unw_addr_space_t, unw_word_t,
+                                   unw_proc_info_t *, void *);
+extern int unw_get_proc_info_in_range(unw_word_t, unw_word_t, unw_word_t,
+                                      unw_word_t, unw_word_t, unw_word_t,
+                                      unw_addr_space_t, unw_word_t,
+                                      unw_proc_info_t *, int, void *);
+extern int unw_reg_states_iterate(unw_cursor_t *, unw_reg_states_callback,
+                                  void *);
+extern int unw_apply_reg_state(unw_cursor_t *, void *);
+extern int unw_get_reg(unw_cursor_t *, int, unw_word_t *);
+extern int unw_set_reg(unw_cursor_t *, int, unw_word_t);
+extern int unw_get_fpreg(unw_cursor_t *, int, unw_fpreg_t *);
+extern int unw_set_fpreg(unw_cursor_t *, int, unw_fpreg_t);
+extern int unw_get_save_loc(unw_cursor_t *, int, unw_save_loc_t *);
+extern int unw_is_signal_frame(unw_cursor_t *);
+extern int unw_is_plt_entry(unw_cursor_t *);
+extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern int unw_get_proc_name_by_ip(unw_addr_space_t, unw_word_t, char *, size_t,
+                                   unw_word_t *, void *);
+extern int unw_get_elf_filename(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern int unw_get_elf_filename_by_ip(unw_addr_space_t, unw_word_t, char *,
+                                      size_t, unw_word_t *, void *);
+extern const char *unw_strerror(int);
+extern int unw_backtrace(void **, int);
+extern int unw_backtrace2(void **, int, unw_context_t *, int);
+
+extern unw_addr_space_t unw_local_addr_space;
diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind-dynamic.h b/systrace/thirdparty/aarch64/libunwind/libunwind-dynamic.h
new file mode 100644
index 0000000000000000000000000000000000000000..13caf1633631ccc33d6c90ace394c539dd03f124
--- /dev/null
+++ b/systrace/thirdparty/aarch64/libunwind/libunwind-dynamic.h
@@ -0,0 +1,201 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2002-2004 Hewlett-Packard Co
+        Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+/* This file defines the runtime-support routines for dynamically
+generated code.  Even though it is implemented as part of libunwind,
+it is logically separate from the interface to perform the actual
+unwinding.  In particular, this interface is always used in the
+context of the unwind target, whereas the rest of the unwind API is
+used in context of the process that is doing the unwind (which may be
+a debugger running on another machine, for example).
+
+Note that the data-structures declared here server a dual purpose:
+when a program registers a dynamically generated procedure, it uses
+these structures directly.  On the other hand, with remote-unwinding,
+the data-structures are read from the remote process's memory and
+translated into internalized versions.  To facilitate remote-access,
+the following rules should be followed in declaring these structures:
+
+ (1) Declare a member as a pointer only if the the information the
+     member points to needs to be internalized as well (e.g., a
+     string representing a procedure name should be declared as
+     "const char *", but the instruction pointer should be declared
+     as unw_word_t).
+
+ (2) Provide sufficient padding to ensure that no implicit padding
+     will be needed on any of the supported target architectures.  For
+     the time being, padding data structures with the assumption that
+     sizeof (unw_word_t) == 8 should be sufficient.  (Note: it's not
+     impossible to internalize structures with internal padding, but
+     it does make the process a bit harder).
+
+ (3) Don't declare members that contain bitfields or floating-point
+     values.
+
+ (4) Don't declare members with enumeration types.  Declare them as
+     int32_t instead.  */
+
+typedef enum
+{
+    UNW_DYN_STOP = 0,     /* end-of-unwind-info marker */
+    UNW_DYN_SAVE_REG,     /* save register to another register */
+    UNW_DYN_SPILL_FP_REL, /* frame-pointer-relative register spill */
+    UNW_DYN_SPILL_SP_REL, /* stack-pointer-relative register spill */
+    UNW_DYN_ADD,          /* add constant value to a register */
+    UNW_DYN_POP_FRAMES,   /* drop one or more stack frames */
+    UNW_DYN_LABEL_STATE,  /* name the current state */
+    UNW_DYN_COPY_STATE,   /* set the region's entry-state */
+    UNW_DYN_ALIAS         /* get unwind info from an alias */
+} unw_dyn_operation_t;
+
+typedef enum
+{
+    UNW_INFO_FORMAT_DYNAMIC,      /* unw_dyn_proc_info_t */
+    UNW_INFO_FORMAT_TABLE,        /* unw_dyn_table_t */
+    UNW_INFO_FORMAT_REMOTE_TABLE, /* unw_dyn_remote_table_t */
+    UNW_INFO_FORMAT_ARM_EXIDX,    /* ARM specific unwind info */
+    UNW_INFO_FORMAT_IP_OFFSET     /* Like UNW_INFO_FORMAT_REMOTE_TABLE, but
+                                     table entries are considered
+                                     relative to di->start_ip, rather
+                                     than di->segbase */
+} unw_dyn_info_format_t;
+
+typedef struct unw_dyn_op
+{
+    int8_t tag;     /* what operation? */
+    int8_t qp;      /* qualifying predicate register */
+    int16_t reg;    /* what register */
+    int32_t when;   /* when does it take effect? */
+    unw_word_t val; /* auxiliary value */
+} unw_dyn_op_t;
+
+typedef struct unw_dyn_region_info
+{
+    struct unw_dyn_region_info *next; /* linked list of regions */
+    int32_t insn_count;               /* region length (# of instructions) */
+    uint32_t op_count;                /* length of op-array */
+    unw_dyn_op_t op[1];               /* variable-length op-array */
+} unw_dyn_region_info_t;
+
+typedef struct unw_dyn_proc_info
+{
+    unw_word_t name_ptr; /* address of human-readable procedure name */
+    unw_word_t handler;  /* address of personality routine */
+    uint32_t flags;
+    int32_t pad0;
+    unw_dyn_region_info_t *regions;
+} unw_dyn_proc_info_t;
+
+typedef struct unw_dyn_table_info
+{
+    unw_word_t name_ptr;  /* addr. of table name (e.g., library name) */
+    unw_word_t segbase;   /* segment base */
+    unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */
+    unw_word_t *table_data;
+} unw_dyn_table_info_t;
+
+typedef struct unw_dyn_remote_table_info
+{
+    unw_word_t name_ptr;  /* addr. of table name (e.g., library name) */
+    unw_word_t segbase;   /* segment base */
+    unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */
+    unw_word_t table_data;
+} unw_dyn_remote_table_info_t;
+
+typedef struct unw_dyn_info
+{
+    /* doubly-linked list of dyn-info structures: */
+    struct unw_dyn_info *next;
+    struct unw_dyn_info *prev;
+    unw_word_t start_ip; /* first IP covered by this entry */
+    unw_word_t end_ip;   /* first IP NOT covered by this entry */
+    unw_word_t gp;       /* global-pointer in effect for this entry */
+    int32_t format;      /* real type: unw_dyn_info_format_t */
+    int32_t pad;
+    unw_word_t load_offset; /* ELF load offset */
+    union
+    {
+        unw_dyn_proc_info_t pi;
+        unw_dyn_table_info_t ti;
+        unw_dyn_remote_table_info_t rti;
+    } u;
+} unw_dyn_info_t;
+
+typedef struct unw_dyn_info_list
+{
+    uint32_t version;
+    uint32_t generation;
+    unw_dyn_info_t *first;
+} unw_dyn_info_list_t;
+
+/* Return the size (in bytes) of an unw_dyn_region_info_t structure that can
+   hold OP_COUNT ops.  */
+#define _U_dyn_region_info_size(op_count)                                      \
+    ((char *)(((unw_dyn_region_info_t *)NULL)->op + (op_count)) - (char *)NULL)
+
+/* Register the unwind info for a single procedure.
+   This routine is NOT signal-safe.  */
+extern void _U_dyn_register(unw_dyn_info_t *);
+
+/* Cancel the unwind info for a single procedure.
+   This routine is NOT signal-safe.  */
+extern void _U_dyn_cancel(unw_dyn_info_t *);
+
+/* Convenience routines.  */
+
+#define _U_dyn_op(_tag, _qp, _when, _reg, _val)                                \
+    ((unw_dyn_op_t){(_tag), (_qp), (_reg), (_when), (_val)})
+
+#define _U_dyn_op_save_reg(op, qp, when, reg, dst)                             \
+    (*(op) = _U_dyn_op(UNW_DYN_SAVE_REG, (qp), (when), (reg), (dst)))
+
+#define _U_dyn_op_spill_fp_rel(op, qp, when, reg, offset)                      \
+    (*(op) = _U_dyn_op(UNW_DYN_SPILL_FP_REL, (qp), (when), (reg), (offset)))
+
+#define _U_dyn_op_spill_sp_rel(op, qp, when, reg, offset)                      \
+    (*(op) = _U_dyn_op(UNW_DYN_SPILL_SP_REL, (qp), (when), (reg), (offset)))
+
+#define _U_dyn_op_add(op, qp, when, reg, value)                                \
+    (*(op) = _U_dyn_op(UNW_DYN_ADD, (qp), (when), (reg), (value)))
+
+#define _U_dyn_op_pop_frames(op, qp, when, num_frames)                         \
+    (*(op) = _U_dyn_op(UNW_DYN_POP_FRAMES, (qp), (when), 0, (num_frames)))
+
+#define _U_dyn_op_label_state(op, label)                                       \
+    (*(op) = _U_dyn_op(UNW_DYN_LABEL_STATE, _U_QP_TRUE, -1, 0, (label)))
+
+#define _U_dyn_op_copy_state(op, label)                                        \
+    (*(op) = _U_dyn_op(UNW_DYN_COPY_STATE, _U_QP_TRUE, -1, 0, (label)))
+
+#define _U_dyn_op_alias(op, qp, when, addr)                                    \
+    (*(op) = _U_dyn_op(UNW_DYN_ALIAS, (qp), (when), 0, (addr)))
+
+#define _U_dyn_op_stop(op)                                                     \
+    (*(op) = _U_dyn_op(UNW_DYN_STOP, _U_QP_TRUE, -1, 0, 0))
+
+/* The target-dependent qualifying predicate which is always TRUE.  On
+   IA-64, that's p0 (0), on non-predicated architectures, the value is
+   ignored.  */
+#define _U_QP_TRUE _U_TDEP_QP_TRUE
diff --git a/systrace/thirdparty/aarch64/libunwind/libunwind.h b/systrace/thirdparty/aarch64/libunwind/libunwind.h
new file mode 100644
index 0000000000000000000000000000000000000000..1624c7f5963663a56a6bcdbc4ba1ac2da31e4a73
--- /dev/null
+++ b/systrace/thirdparty/aarch64/libunwind/libunwind.h
@@ -0,0 +1,40 @@
+/* Provide a real file - not a symlink - as it would cause multiarch conflicts
+   when multiple different arch releases are installed simultaneously.  */
+
+#ifndef UNW_REMOTE_ONLY
+
+#if defined __aarch64__
+#include "libunwind-aarch64.h"
+#elif defined __arm__
+#include "libunwind-arm.h"
+#elif defined __hppa__
+#include "libunwind-hppa.h"
+#elif defined __ia64__
+#include "libunwind-ia64.h"
+#elif defined __mips__
+#include "libunwind-mips.h"
+#elif defined __powerpc__ && !defined __powerpc64__
+#include "libunwind-ppc32.h"
+#elif defined __powerpc64__
+#include "libunwind-ppc64.h"
+#elif defined __sh__
+#include "libunwind-sh.h"
+#elif defined __i386__
+#include "libunwind-x86.h"
+#elif defined __x86_64__
+#include "libunwind-x86_64.h"
+#elif defined __s390x__
+#include "libunwind-s390x.h"
+#elif defined __riscv || defined __riscv__
+#include "libunwind-riscv.h"
+#elif defined __loongarch64
+#include "libunwind-loongarch64.h"
+#else
+#error "Unsupported arch"
+#endif
+
+#else /* UNW_REMOTE_ONLY */
+
+#include "libunwind-aarch64.h"
+
+#endif /* UNW_REMOTE_ONLY */
diff --git a/systrace/thirdparty/aarch64/libunwind/unwind.h b/systrace/thirdparty/aarch64/libunwind/unwind.h
new file mode 100644
index 0000000000000000000000000000000000000000..69201dc8929eb8fcb5c63d059ca538c5fc4273a4
--- /dev/null
+++ b/systrace/thirdparty/aarch64/libunwind/unwind.h
@@ -0,0 +1,158 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2003 Hewlett-Packard Co
+        Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _UNWIND_H
+#define _UNWIND_H
+
+/* For uint64_t */
+#include <stdalign.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    /* Minimal interface as per C++ ABI draft standard:
+
+            http://www.codesourcery.com/cxx-abi/abi-eh.html */
+
+    typedef enum
+    {
+        _URC_NO_REASON = 0,
+        _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+        _URC_FATAL_PHASE2_ERROR = 2,
+        _URC_FATAL_PHASE1_ERROR = 3,
+        _URC_NORMAL_STOP = 4,
+        _URC_END_OF_STACK = 5,
+        _URC_HANDLER_FOUND = 6,
+        _URC_INSTALL_CONTEXT = 7,
+        _URC_CONTINUE_UNWIND = 8
+    } _Unwind_Reason_Code;
+
+    typedef int _Unwind_Action;
+
+#define _UA_SEARCH_PHASE 1
+#define _UA_CLEANUP_PHASE 2
+#define _UA_HANDLER_FRAME 4
+#define _UA_FORCE_UNWIND 8
+
+    struct _Unwind_Context;   /* opaque data-structure */
+    struct _Unwind_Exception; /* forward-declaration */
+
+    typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
+                                                 struct _Unwind_Exception *);
+
+    typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
+                                                   uint64_t,
+                                                   struct _Unwind_Exception *,
+                                                   struct _Unwind_Context *,
+                                                   void *);
+
+    /* The C++ ABI requires exception_class, private_1, and private_2 to
+       be of type uint64 and the entire structure to be
+       double-word-aligned. Please note that exception_class stays 64-bit
+       even on 32-bit machines for gcc compatibility.  */
+    struct _Unwind_Exception
+    {
+        alignas(8) uint64_t exception_class;
+        _Unwind_Exception_Cleanup_Fn exception_cleanup;
+        unsigned long private_1;
+        unsigned long private_2;
+    };
+
+    extern _Unwind_Reason_Code
+    _Unwind_RaiseException(struct _Unwind_Exception *);
+    extern _Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
+                                                    _Unwind_Stop_Fn, void *);
+    extern void _Unwind_Resume(struct _Unwind_Exception *);
+    extern void _Unwind_DeleteException(struct _Unwind_Exception *);
+    extern unsigned long _Unwind_GetGR(struct _Unwind_Context *, int);
+    extern void _Unwind_SetGR(struct _Unwind_Context *, int, unsigned long);
+    extern unsigned long _Unwind_GetIP(struct _Unwind_Context *);
+    extern unsigned long _Unwind_GetIPInfo(struct _Unwind_Context *, int *);
+    extern void _Unwind_SetIP(struct _Unwind_Context *, unsigned long);
+    extern unsigned long
+    _Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
+    extern unsigned long _Unwind_GetRegionStart(struct _Unwind_Context *);
+
+#ifdef _GNU_SOURCE
+
+    /* Callback for _Unwind_Backtrace().  The backtrace stops immediately
+       if the callback returns any value other than _URC_NO_REASON. */
+    typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
+                                                    void *);
+
+/* See http://gcc.gnu.org/ml/gcc-patches/2001-09/msg00082.html for why
+   _UA_END_OF_STACK exists.  */
+#define _UA_END_OF_STACK 16
+
+    /* If the unwind was initiated due to a forced unwind, resume that
+       operation, else re-raise the exception.  This is used by
+       __cxa_rethrow().  */
+    extern _Unwind_Reason_Code
+    _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
+
+    /* See http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00154.html for why
+       _Unwind_GetBSP() exists.  */
+    extern unsigned long _Unwind_GetBSP(struct _Unwind_Context *);
+
+    /* Return the "canonical frame address" for the given context.
+       This is used by NPTL... */
+    extern unsigned long _Unwind_GetCFA(struct _Unwind_Context *);
+
+    /* Return the base-address for data references.  */
+    extern unsigned long _Unwind_GetDataRelBase(struct _Unwind_Context *);
+
+    /* Return the base-address for text references.  */
+    extern unsigned long _Unwind_GetTextRelBase(struct _Unwind_Context *);
+
+    /* Call _Unwind_Trace_Fn once for each stack-frame, without doing any
+       cleanup.  The first frame for which the callback is invoked is the
+       one for the caller of _Unwind_Backtrace().  _Unwind_Backtrace()
+       returns _URC_END_OF_STACK when the backtrace stopped due to
+       reaching the end of the call-chain or _URC_FATAL_PHASE1_ERROR if it
+       stops for any other reason.  */
+    extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+    /* Find the start-address of the procedure containing the specified IP
+       or NULL if it cannot be found (e.g., because the function has no
+       unwind info).  Note: there is not necessarily a one-to-one
+       correspondence between source-level functions and procedures: some
+       functions don't have unwind-info and others are split into multiple
+       procedures.  */
+    extern void *_Unwind_FindEnclosingFunction(void *);
+
+    /* See also Linux Standard Base Spec:
+        http://www.linuxbase.org/spec/refspecs/LSB_1.3.0/gLSB/gLSB/libgcc-s.html
+     */
+
+#endif /* _GNU_SOURCE */
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* _UNWIND_H */
diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti.h b/systrace/thirdparty/aarch64/mspti/include/mspti.h
new file mode 100644
index 0000000000000000000000000000000000000000..e83c454c11cb784c7a22f82f50127f2f9d2a368c
--- /dev/null
+++ b/systrace/thirdparty/aarch64/mspti/include/mspti.h
@@ -0,0 +1,19 @@
+/**
+ * @file mspti.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_H
+#define MSPTI_H
+
+#include "mspti_activity.h"
+#include "mspti_callback.h"
+#include "mspti_cbid.h"
+#include "mspti_result.h"
+
+#endif
diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_activity.h b/systrace/thirdparty/aarch64/mspti/include/mspti_activity.h
new file mode 100644
index 0000000000000000000000000000000000000000..30f71598d073b9637c9ec440939f30f65ef30e74
--- /dev/null
+++ b/systrace/thirdparty/aarch64/mspti/include/mspti_activity.h
@@ -0,0 +1,424 @@
+/**
+ * @file mspti_activity.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_ACTIVITY_H
+#define MSPTI_ACTIVITY_H
+
+#define ACTIVITY_STRUCT_ALIGNMENT 8
+#if defined(_WIN32)
+#define START_PACKED_ALIGNMENT __pragma(pack(push, 1))
+#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_STRUCT_ALIGNMENT))
+#define END_PACKED_ALIGNMENT __pragma(pack(pop))
+#elif defined(__GNUC__)
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT                                                       \
+    __attribute__((__packed__))                                                \
+    __attribute__((aligned(ACTIVITY_STRUCT_ALIGNMENT)))
+#define END_PACKED_ALIGNMENT
+#else
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT
+#define END_PACKED_ALIGNMENT
+#endif
+
+#include "mspti_result.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility push(default)
+#endif
+
+    /**
+     * @brief The kinds of activity records.
+     *
+     * Each kind is associated with a
+     * activity record structure that holds the information associated
+     * with the kind.
+     */
+    typedef enum
+    {
+        /**
+         * The activity record is invalid.
+         */
+        MSPTI_ACTIVITY_KIND_INVALID = 0,
+        MSPTI_ACTIVITY_KIND_MARKER = 1,
+        MSPTI_ACTIVITY_KIND_KERNEL = 2,
+        MSPTI_ACTIVITY_KIND_API = 3,
+        MSPTI_ACTIVITY_KIND_COUNT,
+        MSPTI_ACTIVITY_KIND_FORCE_INT = 0x7fffffff
+    } msptiActivityKind;
+
+    /**
+     * @brief The source kinds of mark data.
+     *
+     * Each mark activity record kind represents information about host or
+     * device
+     */
+    typedef enum
+    {
+        MSPTI_ACTIVITY_SOURCE_KIND_HOST = 0,
+        MSPTI_ACTIVITY_SOURCE_KIND_DEVICE = 1
+    } msptiActivitySourceKind;
+
+    /**
+     * @brief Flags linked to activity records.
+     *
+     * These are the Flags that pertain to activity records.
+     * Flags can be combined by bitwise OR to
+     * associated multiple flags with an activity record.
+     */
+    typedef enum
+    {
+        /**
+         * Signifies that the activity record lacks any flags.
+         */
+        MSPTI_ACTIVITY_FLAG_NONE = 0,
+        /**
+         * Represents the activity as a pure host instantaneous marker. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS = 1 << 0,
+        /**
+         * Represents the activity as a pure host region start marker. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_START = 1 << 1,
+        /**
+         * Represents the activity as a pure host region end marker. Works with
+         * MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_END = 1 << 2,
+        /**
+         * Represents the activity as an instantaneous marker with device. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS_WITH_DEVICE = 1 << 3,
+        /**
+         * Represents the activity as a pure start marker with device. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_START_WITH_DEVICE = 1 << 4,
+        /**
+         * Represents the activity as a pure end marker with device. Works with
+         * MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_END_WITH_DEVICE = 1 << 5
+    } msptiActivityFlag;
+
+    START_PACKED_ALIGNMENT
+
+    typedef struct PACKED_ALIGNMENT
+    {
+        msptiActivityKind kind;
+    } msptiActivity;
+
+    typedef union PACKED_ALIGNMENT
+    {
+        /**
+         * A thread object requires that we identify both the process and
+         * thread ID.
+         */
+        struct
+        {
+            uint32_t processId;
+            uint32_t threadId;
+        } pt;
+        /**
+         * A stream object requires that we identify device and stream ID.
+         */
+        struct
+        {
+            uint32_t deviceId;
+            uint32_t streamId;
+        } ds;
+    } msptiObjectId;
+
+    /**
+     * @brief This activity record serves as a marker, representing a specific
+     * moment in time.
+     *
+     * The marker is characterized by a distinctive name and a unique identifier
+     */
+    typedef struct PACKED_ALIGNMENT
+    {
+        /**
+         * The activity record kind, always be MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        msptiActivityKind kind;
+
+        /**
+         * The flags associated with the marker.
+         * @see msptiActivityFlag
+         */
+        msptiActivityFlag flag;
+
+        /**
+         * The source kinds of mark data.
+         * @see msptiActivitySourceKind
+         */
+        msptiActivitySourceKind sourceKind;
+
+        /**
+         * The timestamp for the marker, in ns. A value of 0 indicates that
+         * timestamp information could not be collected for the marker.
+         */
+        uint64_t timestamp;
+
+        /**
+         * The marker ID.
+         */
+        uint64_t id;
+
+        /**
+         * The identifier for the activity object associated with this
+         * marker. 'objectKind' indicates which ID is valid for this record.
+         */
+        msptiObjectId objectId;
+
+        /**
+         * The marker name for an instantaneous or start marker.
+         * This will be NULL for an end marker.
+         */
+        const char *name;
+
+        /**
+         * The name of the domain to which this marker belongs to.
+         * This will be NULL for default domain.
+         */
+        const char *domain;
+    } msptiActivityMarker;
+
+    typedef struct PACKED_ALIGNMENT
+    {
+        /**
+         * The activity record kind, must be MSPTI_ACTIVITY_KIND_API.
+         */
+        msptiActivityKind kind;
+
+        /**
+         * The start timestamp for the api, in ns.
+         */
+        uint64_t start;
+
+        /**
+         * The end timestamp for the api, in ns.
+         */
+        uint64_t end;
+
+        /**
+         * A thread object requires that we identify both the process and
+         * thread ID.
+         */
+        struct
+        {
+            uint32_t processId;
+            uint32_t threadId;
+        } pt;
+
+        /**
+         * The correlation ID of the kernel.
+         */
+        uint64_t correlationId;
+
+        /**
+         * The api name.
+         */
+        const char *name;
+    } msptiActivityApi;
+
+    typedef struct PACKED_ALIGNMENT
+    {
+        /**
+         * The activity record kind, must be MSPTI_ACTIVITY_KIND_KERNEL.
+         */
+        msptiActivityKind kind;
+
+        /**
+         * The start timestamp for the kernel, in ns.
+         */
+        uint64_t start;
+
+        /**
+         * The end timestamp for the kernel, in ns.
+         */
+        uint64_t end;
+
+        /**
+         * A stream object requires that we identify device and stream ID.
+         */
+        struct
+        {
+            uint32_t deviceId;
+            uint32_t streamId;
+        } ds;
+
+        /**
+         * The correlation ID of the kernel.
+         */
+        uint64_t correlationId;
+
+        /**
+         * The kernel type.
+         */
+        const char *type;
+
+        /**
+         * The kernel name.
+         */
+        const char *name;
+    } msptiActivityKernel;
+
+    END_PACKED_ALIGNMENT
+
+    /**
+     * @brief Function type for callback used by MSPTI to request an empty
+     * buffer for storing activity records.
+     *
+     * This callback function signals the MSPTI client that an activity
+     * buffer is needed by MSPTI. The activity buffer is used by MSPTI to
+     * store activity records. The callback function can decline the
+     * request by setting **buffer to NULL. In this case MSPTI may drop
+     * activity records.
+     *
+     * @param buffer Returns the new buffer. If set to NULL then no buffer
+     * is returned.
+     * @param size Returns the size of the returned buffer.
+     * @param maxNumRecords Returns the maximum number of records that
+     * should be placed in the buffer. If 0 then the buffer is filled with
+     * as many records as possible. If > 0 the buffer is filled with at
+     * most that many records before it is returned.
+     */
+    typedef void (*msptiBuffersCallbackRequestFunc)(uint8_t **buffer,
+                                                    size_t *size,
+                                                    size_t *maxNumRecords);
+
+    /**
+     * @brief Function type for callback used by MSPTI to return a buffer
+     * of activity records.
+     *
+     * This callback function returns to the MSPTI client a buffer
+     * containing activity records.  The buffer contains @p validSize
+     * bytes of activity records which should be read using
+     * msptiActivityGetNextRecord. After this call MSPTI
+     * relinquished ownership of the buffer and will not use it
+     * anymore. The client may return the buffer to MSPTI using the
+     * msptiBuffersCallbackRequestFunc callback.
+     *
+     * @param buffer The activity record buffer.
+     * @param size The total size of the buffer in bytes as set in
+     * MSPTI_BuffersCallbackRequestFunc.
+     * @param validSize The number of valid bytes in the buffer.
+     */
+    typedef void (*msptiBuffersCallbackCompleteFunc)(uint8_t *buffer,
+                                                     size_t size,
+                                                     size_t validSize);
+
+    /**
+     * @brief Registers callback functions with MSPTI for activity buffer
+     * handling.
+     *
+     * This function registers two callback functions to be used in asynchronous
+     * buffer handling. If registered, activity record buffers are handled using
+     * asynchronous requested/completed callbacks from MSPTI.
+     *
+     * @param funcBufferRequested callback which is invoked when an empty
+     * buffer is requested by MSPTI
+     * @param funcBufferCompleted callback which is invoked when a buffer
+     * containing activity records is available from MSPTI
+     *
+     * @retval MSPTI_SUCCESS
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if either
+     * funcBufferRequested or funcBufferCompleted is NULL
+     */
+    msptiResult msptiActivityRegisterCallbacks(
+        msptiBuffersCallbackRequestFunc funcBufferRequested,
+        msptiBuffersCallbackCompleteFunc funcBufferCompleted);
+
+    /**
+     * @brief Enable collection of a specific kind of activity record.
+     *
+     * Enable collection of a specific kind of activity record. Multiple
+     * kinds can be enabled by calling this function multiple times.
+     * By default, the collection of all activity types is inactive.
+     *
+     * @param kind The kind of activity record to collect
+     *
+     * @retval MSPTI_SUCCESS
+     */
+    msptiResult msptiActivityEnable(msptiActivityKind kind);
+
+    /**
+     * @brief Disable collection of a specific kind of activity record.
+     *
+     * Disable collection of a specific kind of activity record. Multiple
+     * kinds can be disabled by calling this function multiple times.
+     * By default, the collection of all activity types is inactive.
+     *
+     * @param kind The kind of activity record to stop collecting
+     *
+     * @retval MSPTI_SUCCESS
+     */
+    msptiResult msptiActivityDisable(msptiActivityKind kind);
+
+    /**
+     * @brief Iterate over the activity records in a buffer.
+     *
+     * This is a function to iterate over the activity records in buffer.
+     *
+     * @param buffer The buffer containing activity records
+     * @param validBufferSizeBytes The number of valid bytes in the buffer.
+     * @param record Inputs the previous record returned by
+     * msptiActivityGetNextRecord and returns the next activity record
+     * from the buffer. If input value is NULL, returns the first activity
+     * record in the buffer.
+     *
+     * @retval MSPTI_SUCCESS
+     * @retval MSPTI_ERROR_MAX_LIMIT_REACHED if no more records in the buffer
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if buffer is NULL.
+     */
+    msptiResult msptiActivityGetNextRecord(uint8_t *buffer,
+                                           size_t validBufferSizeBytes,
+                                           msptiActivity **record);
+
+    /**
+     * @brief Request to deliver activity records via the buffer completion
+     * callback.
+     *
+     * This function returns the activity records associated with all
+     * contexts/streams (and the global buffers not associated with any stream)
+     * to the MSPTI client using the callback registered in
+     * msptiActivityRegisterCallbacks. It return all activity buffers that
+     * contain completed activity records, even if these buffers are not
+     * completely filled.
+     *
+     * Before calling this function, the buffer handling callback api must be
+     * activated by calling msptiActivityRegisterCallbacks.
+     *
+     * @param flag Reserved for internal use.
+     *
+     * @retval MSPTI_SUCCESS
+     */
+    msptiResult msptiActivityFlushAll(uint32_t flag);
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility pop
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_callback.h b/systrace/thirdparty/aarch64/mspti/include/mspti_callback.h
new file mode 100644
index 0000000000000000000000000000000000000000..2e6f7ee2264b9e99f5f891fdc6ac3cd20d53bf66
--- /dev/null
+++ b/systrace/thirdparty/aarch64/mspti/include/mspti_callback.h
@@ -0,0 +1,258 @@
+/**
+ * @file mspti_callback.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_CALLBACK_H
+#define MSPTI_CALLBACK_H
+
+#include "mspti_cbid.h"
+#include "mspti_result.h"
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility push(default)
+#endif
+
+    /**
+     * @brief Callback domains.
+     *
+     * Callback domains. Each domain represents callback points for a
+     * group of related API functions or CANN driver activity.
+     */
+    typedef enum
+    {
+        /**
+         * Invalid domain.
+         */
+        MSPTI_CB_DOMAIN_INVALID = 0,
+        /**
+         * Domain containing callback points for all runtime API functions.
+         */
+        MSPTI_CB_DOMAIN_RUNTIME = 1,
+        MSPTI_CB_DOMAIN_HCCL = 2,
+        MSPTI_CB_DOMAIN_SIZE,
+        MSPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff
+    } msptiCallbackDomain;
+
+    typedef uint32_t msptiCallbackId;
+
+    /**
+     * @brief Specifies the point in an API call that a callback is issued.
+     *
+     * Specifies the point in an API call that a callback is issued. This
+     * value is communicated to the callback function by @ref
+     * msptiCallbackData::callbackSite.
+     */
+    typedef enum
+    {
+        /**
+         * The callback is at the entry of the API call.
+         */
+        MSPTI_API_ENTER = 0,
+        /**
+         * The callback is at the exit of the API call.
+         */
+        MSPTI_API_EXIT = 1,
+        MSPTI_API_CBSITE_FORCE_INT = 0x7fffffff
+    } msptiApiCallbackSite;
+
+    typedef struct
+    {
+        /**
+         * Point in the runtime or driver function from where the callback
+         * was issued.
+         */
+        msptiApiCallbackSite callbackSite;
+
+        /**
+         * Name of the runtime or driver API function which issued the
+         * callback.
+         */
+        const char *functionName;
+
+        /**
+         * Params of the runtime or driver API function which issued the
+         * callback.
+         */
+        const void *functionParams;
+
+        /**
+         * Pointer to the return value of the runtime or driver API
+         * call.
+         */
+        const void *functionReturnValue;
+
+        /**
+         * Name of the symbol operated on by the runtime or driver API
+         * function which issued the callback. This entry is valid only for
+         * driver and runtime launch callbacks, where it returns the name of
+         * the kernel.
+         */
+        const char *symbolName;
+
+        /**
+         * The activity record correlation ID for this callback. For a
+         * driver domain callback (i.e. @p domain
+         * MSPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID
+         * in the MSPTI_ActivityAPI record corresponding to the CANN driver
+         * function call. For a runtime domain callback (i.e. @p domain
+         * MSPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation
+         * ID in the MSPTI_ActivityAPI record corresponding to the CANN
+         * runtime function call. Within the callback, this ID can be
+         * recorded to correlate user data with the activity record.
+         */
+        uint64_t correlationId;
+
+        /**
+         * Undefined. Reserved for internal use.
+         */
+        uint64_t reserved1;
+
+        /**
+         * Undefined. Reserved for internal use.
+         */
+        uint64_t reserved2;
+
+        /**
+         * Pointer to data shared between the entry and exit callbacks of
+         * a given runtime or drive API function invocation. This field
+         * can be used to pass 64-bit values from the entry callback to
+         * the corresponding exit callback.
+         */
+        uint64_t *correlationData;
+    } msptiCallbackData;
+
+    /**
+     * @brief Function type for a callback.
+     *
+     * Function type for a callback. The type of the data passed to the
+     * callback in @p cbdata depends on the @p domain. If @p domain is
+     * MSPTI_CB_DOMAIN_RUNTIME the type
+     * of @p cbdata will be msptiCallbackData.
+     *
+     * @param userdata User data supplied at subscription of the callback
+     * @param domain The domain of the callback
+     * @param cbid The ID of the callback
+     * @param cbdata Data passed to the callback.
+     */
+    typedef void (*msptiCallbackFunc)(void *userdata,
+                                      msptiCallbackDomain domain,
+                                      msptiCallbackId cbid,
+                                      const msptiCallbackData *cbdata);
+
+    struct msptiSubscriber_st;
+
+    /**
+     * @brief A callback subscriber.
+     */
+    typedef struct msptiSubscriber_st *msptiSubscriberHandle;
+
+    /**
+     * @brief Initialize a callback subscriber with a callback function
+     * and user data.
+     *
+     * Initializes a callback subscriber with a callback function and
+     * (optionally) a pointer to user data. The returned subscriber handle
+     * can be used to enable and disable the callback for specific domains
+     * and callback IDs.
+     * @note Only a single subscriber can be registered at a time. To ensure
+     * that no other MSPTI client interrupts the profiling session, it's the
+     * responsibility of all the MSPTI clients to call this function before
+     * starting the profling session.
+     * @note This function does not enable any callbacks.
+     * @note @b Thread-safety: this function is thread safe.
+     *
+     * @param subscriber handle to initialize subscriber
+     * @param callback The callback function
+     * @param userdata A pointer to user data. This data will be passed to
+     * the callback function via the @p userdata paramater.
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INNER if unable to initialize MSPTI
+     * @retval MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is
+     * already a MSPTI subscriber
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL
+     */
+    msptiResult msptiSubscribe(msptiSubscriberHandle *subscriber,
+                               msptiCallbackFunc callback, void *userdata);
+
+    /**
+     * @brief Unregister a callback subscriber.
+     *
+     * Removes a callback subscriber so that no future callbacks will be
+     * issued to that subscriber.
+     *
+     * @param subscriber Handle to the initialize subscriber
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL or not
+     * initialized
+     */
+    msptiResult msptiUnsubscribe(msptiSubscriberHandle subscriber);
+
+    /**
+     * @brief Enable or disabled callbacks for a specific domain and
+     * callback ID.
+     *
+     * Enable or disabled callbacks for a subscriber for a specific domain
+     * and callback ID.
+     *
+     * @note @b Thread-safety: a subscriber must serialize access to
+     * msptiEnableCallback, msptiEnableDomain.
+     *
+     * @param enable New enable state for the callback. Zero disables the
+     * callback, non-zero enables the callback.
+     * @param subscriber Handle to callback subscription
+     * @param domain The domain of the callback
+     * @param cbid The ID of the callback
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain or @p
+     * cbid is invalid.
+     */
+    msptiResult msptiEnableCallback(uint32_t enable,
+                                    msptiSubscriberHandle subscriber,
+                                    msptiCallbackDomain domain,
+                                    msptiCallbackId cbid);
+
+    /**
+     * @brief Enable or disabled callbacks for a specific domain
+     *
+     * Enable or disabled callbacks for a subscriber for a specific domain
+     *
+     * @note @b Thread-safety: a subscriber must serialize access to
+     * msptiEnableCallback, msptiEnableDomain.
+     *
+     * @param enable New enable state for the callback. Zero disables the
+     * callback, non-zero enables the callback.
+     * @param subscriber Handle to callback subscription
+     * @param domain The domain of the callback
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain is
+     * invalid.
+     */
+    msptiResult msptiEnableDomain(uint32_t enable,
+                                  msptiSubscriberHandle subscriber,
+                                  msptiCallbackDomain domain);
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility pop
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_cbid.h b/systrace/thirdparty/aarch64/mspti/include/mspti_cbid.h
new file mode 100644
index 0000000000000000000000000000000000000000..540ad394376e5a9f6bb74fb0a53c9072a24b1a9c
--- /dev/null
+++ b/systrace/thirdparty/aarch64/mspti/include/mspti_cbid.h
@@ -0,0 +1,83 @@
+/**
+ * @file mspti_cbid.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_CBID_H
+#define MSPTI_CBID_H
+
+/**
+ * @brief Definitions of indices for Runtime API functions, unique across entire
+ * API
+ */
+typedef enum
+{
+    MSPTI_CBID_RUNTIME_INVALID = 0,
+    MSPTI_CBID_RUNTIME_DEVICE_SET = 1,
+    MSPTI_CBID_RUNTIME_DEVICE_RESET = 2,
+    MSPTI_CBID_RUNTIME_DEVICE_SET_EX = 3,
+    MSPTI_CBID_RUNTIME_CONTEXT_CREATED_EX = 4,
+    MSPTI_CBID_RUNTIME_CONTEXT_CREATED = 5,
+    MSPTI_CBID_RUNTIME_CONTEXT_DESTROY = 6,
+    MSPTI_CBID_RUNTIME_STREAM_CREATED = 7,
+    MSPTI_CBID_RUNTIME_STREAM_DESTROY = 8,
+    MSPTI_CBID_RUNTIME_STREAM_SYNCHRONIZED = 9,
+    MSPTI_CBID_RUNTIME_LAUNCH = 10,
+    MSPTI_CBID_RUNTIME_CPU_LAUNCH = 11,
+    MSPTI_CBID_RUNTIME_AICPU_LAUNCH = 12,
+    MSPTI_CBID_RUNTIME_AIV_LAUNCH = 13,
+    MSPTI_CBID_RUNTIME_FFTS_LAUNCH = 14,
+    MSPTI_CBID_RUNTIME_MALLOC = 15,
+    MSPTI_CBID_RUNTIME_FREE = 16,
+    MSPTI_CBID_RUNTIME_MALLOC_HOST = 17,
+    MSPTI_CBID_RUNTIME_FREE_HOST = 18,
+    MSPTI_CBID_RUNTIME_MALLOC_CACHED = 19,
+    MSPTI_CBID_RUNTIME_FLUSH_CACHE = 20,
+    MSPTI_CBID_RUNTIME_INVALID_CACHE = 21,
+    MSPTI_CBID_RUNTIME_MEMCPY = 22,
+    MSPTI_CBID_RUNTIME_MEMCPY_HOST = 23,
+    MSPTI_CBID_RUNTIME_MEMCPY_ASYNC = 24,
+    MSPTI_CBID_RUNTIME_MEM_CPY2D = 25,
+    MSPTI_CBID_RUNTIME_MEM_CPY2D_ASYNC = 26,
+    MSPTI_CBID_RUNTIME_MEM_SET = 27,
+    MSPTI_CBID_RUNTIME_MEM_SET_ASYNC = 28,
+    MSPTI_CBID_RUNTIME_MEM_GET_INFO = 29,
+    MSPTI_CBID_RUNTIME_RESERVE_MEM_ADDRESS = 30,
+    MSPTI_CBID_RUNTIME_RELEASE_MEM_ADDRESS = 31,
+    MSPTI_CBID_RUNTIME_MALLOC_PHYSICAL = 32,
+    MSPTI_CBID_RUNTIME_FREE_PHYSICAL = 33,
+    MSPTI_CBID_RUNTIME_MEM_EXPORT_TO_SHAREABLE_HANDLE = 34,
+    MSPTI_CBID_RUNTIME_MEM_IMPORT_FROM_SHAREABLE_HANDLE = 35,
+    MSPTI_CBID_RUNTIME_MEM_SET_PID_TO_SHAREABLE_HANDLE = 36,
+    MSPTI_CBID_RUNTIME_SIZE,
+    MSPTI_CBID_RUNTIME_FORCE_INT = 0x7fffffff
+} msptiCallbackIdRuntime;
+
+/**
+ * @brief Definitions of indices for hccl API functions
+ */
+typedef enum
+{
+    MSPTI_CBID_HCCL_INVALID = 0,
+    MSPTI_CBID_HCCL_ALLREDUCE = 1,
+    MSPTI_CBID_HCCL_BROADCAST = 2,
+    MSPTI_CBID_HCCL_ALLGATHER = 3,
+    MSPTI_CBID_HCCL_REDUCE_SCATTER = 4,
+    MSPTI_CBID_HCCL_REDUCE = 5,
+    MSPTI_CBID_HCCL_ALL_TO_ALL = 6,
+    MSPTI_CBID_HCCL_ALL_TO_ALLV = 7,
+    MSPTI_CBID_HCCL_BARRIER = 8,
+    MSPTI_CBID_HCCL_SCATTER = 9,
+    MSPTI_CBID_HCCL_SEND = 10,
+    MSPTI_CBID_HCCL_RECV = 11,
+    MSPTI_CBID_HCCL_SENDRECV = 12,
+    MSPTI_CBID_HCCL_SIZE,
+    MSPTI_CBID_HCCL_FORCE_INT = 0x7fffffff
+} msptiCallbackIdHccl;
+
+#endif
diff --git a/systrace/thirdparty/aarch64/mspti/include/mspti_result.h b/systrace/thirdparty/aarch64/mspti/include/mspti_result.h
new file mode 100644
index 0000000000000000000000000000000000000000..902647eed2e5efc7b69f2d2dd865e228d4a22d0e
--- /dev/null
+++ b/systrace/thirdparty/aarch64/mspti/include/mspti_result.h
@@ -0,0 +1,30 @@
+/**
+ * @file mspti_result.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_BASE_H
+#define MSPTI_BASE_H
+
+/**
+ * @brief MSPTI result codes.
+ *
+ * Error and result codes returned by MSPTI functions.
+ */
+typedef enum
+{
+    MSPTI_SUCCESS = 0,
+    MSPTI_ERROR_INVALID_PARAMETER = 1,
+    MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 2,
+    MSPTI_ERROR_MAX_LIMIT_REACHED = 3,
+    MSPTI_ERROR_DEVICE_OFFLINE = 4,
+    MSPTI_ERROR_INNER = 999,
+    MSPTI_ERROR_FOECE_INT = 0x7fffffff
+} msptiResult;
+
+#endif
diff --git a/systrace/thirdparty/aarch64/mspti/lib64/libmspti.so b/systrace/thirdparty/aarch64/mspti/lib64/libmspti.so
new file mode 100644
index 0000000000000000000000000000000000000000..c6bc165910ce21933220f48149ef4f3ba240b8dd
Binary files /dev/null and b/systrace/thirdparty/aarch64/mspti/lib64/libmspti.so differ
diff --git a/systrace/thirdparty/uthash.h b/systrace/thirdparty/uthash.h
new file mode 100644
index 0000000000000000000000000000000000000000..6d892006a8fca1a00848bb1426d0460cb060b0b5
--- /dev/null
+++ b/systrace/thirdparty/uthash.h
@@ -0,0 +1,1417 @@
+/*
+Copyright (c) 2003-2025, Troy D. Hanson  https://troydhanson.github.io/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#define UTHASH_VERSION 2.3.0
+
+#include <stddef.h> /* ptrdiff_t */
+#include <stdlib.h> /* exit */
+#include <string.h> /* memcmp, memset, strlen */
+
+#if defined(HASH_NO_STDINT) && HASH_NO_STDINT
+/* The user doesn't have <stdint.h>, and must figure out their own way
+   to provide definitions for uint8_t and uint32_t. */
+#else
+#include <stdint.h> /* uint8_t, uint32_t */
+#endif
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#if !defined(DECLTYPE) && !defined(NO_DECLTYPE)
+#if defined(_MSC_VER)                        /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#endif
+#elif defined(__MCST__) /* Elbrus C Compiler */
+#define DECLTYPE(x) (__typeof(x))
+#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) ||      \
+    defined(__WATCOMC__)
+#define NO_DECLTYPE
+#else /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE(x)
+#define DECLTYPE_ASSIGN(dst, src)                                              \
+    do                                                                         \
+    {                                                                          \
+        char **_da_dst = (char **)(&(dst));                                    \
+        *_da_dst = (char *)(src);                                              \
+    } while (0)
+#else
+#define DECLTYPE_ASSIGN(dst, src)                                              \
+    do                                                                         \
+    {                                                                          \
+        (dst) = DECLTYPE(dst)(src);                                            \
+    } while (0)
+#endif
+
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz) /* malloc fcn                      */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr, sz) free(ptr) /* free fcn                        */
+#endif
+#ifndef uthash_bzero
+#define uthash_bzero(a, n) memset(a, '\0', n)
+#endif
+#ifndef uthash_strlen
+#define uthash_strlen(s) strlen(s)
+#endif
+
+#ifndef HASH_FUNCTION
+#define HASH_FUNCTION(keyptr, keylen, hashv) HASH_JEN(keyptr, keylen, hashv)
+#endif
+
+#ifndef HASH_KEYCMP
+#define HASH_KEYCMP(a, b, n) memcmp(a, b, n)
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand  */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl) /* can be defined to log expands   */
+#endif
+
+#ifndef HASH_NONFATAL_OOM
+#define HASH_NONFATAL_OOM 0
+#endif
+
+#if HASH_NONFATAL_OOM
+/* malloc failures can be recovered from */
+
+#ifndef uthash_nonfatal_oom
+#define uthash_nonfatal_oom(obj)                                               \
+    do                                                                         \
+    {                                                                          \
+    } while (0) /* non-fatal OOM error */
+#endif
+
+#define HASH_RECORD_OOM(oomed)                                                 \
+    do                                                                         \
+    {                                                                          \
+        (oomed) = 1;                                                           \
+    } while (0)
+#define IF_HASH_NONFATAL_OOM(x) x
+
+#else
+/* malloc failures result in lost memory, hash tables are unusable */
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1) /* fatal OOM error */
+#endif
+
+#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory")
+#define IF_HASH_NONFATAL_OOM(x)
+
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2                                          \
+    5U                               /* lg2 of initial number of buckets       \
+                                      */
+#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhp */
+#define ELMT_FROM_HH(tbl, hhp) ((void *)(((char *)(hhp)) - ((tbl)->hho)))
+/* calculate the hash handle from element address elp */
+#define HH_FROM_ELMT(tbl, elp)                                                 \
+    ((UT_hash_handle *)(void *)(((char *)(elp)) + ((tbl)->hho)))
+
+#define HASH_ROLLBACK_BKT(hh, head, itemptrhh)                                 \
+    do                                                                         \
+    {                                                                          \
+        struct UT_hash_handle *_hd_hh_item = (itemptrhh);                      \
+        unsigned _hd_bkt;                                                      \
+        HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
+        (head)->hh.tbl->buckets[_hd_bkt].count++;                              \
+        _hd_hh_item->hh_next = NULL;                                           \
+        _hd_hh_item->hh_prev = NULL;                                           \
+    } while (0)
+
+#define HASH_VALUE(keyptr, keylen, hashv)                                      \
+    do                                                                         \
+    {                                                                          \
+        HASH_FUNCTION(keyptr, keylen, hashv);                                  \
+    } while (0)
+
+#define HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, hashval, out)          \
+    do                                                                         \
+    {                                                                          \
+        (out) = NULL;                                                          \
+        if (head)                                                              \
+        {                                                                      \
+            unsigned _hf_bkt;                                                  \
+            HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt);        \
+            if (HASH_BLOOM_TEST((head)->hh.tbl, hashval))                      \
+            {                                                                  \
+                HASH_FIND_IN_BKT((head)->hh.tbl, hh,                           \
+                                 (head)->hh.tbl->buckets[_hf_bkt], keyptr,     \
+                                 keylen, hashval, out);                        \
+            }                                                                  \
+        }                                                                      \
+    } while (0)
+
+#define HASH_FIND(hh, head, keyptr, keylen, out)                               \
+    do                                                                         \
+    {                                                                          \
+        (out) = NULL;                                                          \
+        if (head)                                                              \
+        {                                                                      \
+            unsigned _hf_hashv;                                                \
+            HASH_VALUE(keyptr, keylen, _hf_hashv);                             \
+            HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out);   \
+        }                                                                      \
+    } while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN                                                     \
+    (HASH_BLOOM_BITLEN / 8UL) + (((HASH_BLOOM_BITLEN % 8UL) != 0UL) ? 1UL : 0UL)
+#define HASH_BLOOM_MAKE(tbl, oomed)                                            \
+    do                                                                         \
+    {                                                                          \
+        (tbl)->bloom_nbits = HASH_BLOOM;                                       \
+        (tbl)->bloom_bv = (uint8_t *)uthash_malloc(HASH_BLOOM_BYTELEN);        \
+        if (!(tbl)->bloom_bv)                                                  \
+        {                                                                      \
+            HASH_RECORD_OOM(oomed);                                            \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                 \
+            (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                           \
+        }                                                                      \
+    } while (0)
+
+#define HASH_BLOOM_FREE(tbl)                                                   \
+    do                                                                         \
+    {                                                                          \
+        uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                      \
+    } while (0)
+
+#define HASH_BLOOM_BITSET(bv, idx) (bv[(idx) / 8U] |= (1U << ((idx) % 8U)))
+#define HASH_BLOOM_BITTEST(bv, idx)                                            \
+    ((bv[(idx) / 8U] & (1U << ((idx) % 8U))) != 0)
+
+#define HASH_BLOOM_ADD(tbl, hashv)                                             \
+    HASH_BLOOM_BITSET(                                                         \
+        (tbl)->bloom_bv,                                                       \
+        ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U)))
+
+#define HASH_BLOOM_TEST(tbl, hashv)                                            \
+    HASH_BLOOM_BITTEST(                                                        \
+        (tbl)->bloom_bv,                                                       \
+        ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl, oomed)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl, hashv)
+#define HASH_BLOOM_TEST(tbl, hashv) 1
+#define HASH_BLOOM_BYTELEN 0U
+#endif
+
+#define HASH_MAKE_TABLE(hh, head, oomed)                                       \
+    do                                                                         \
+    {                                                                          \
+        (head)->hh.tbl =                                                       \
+            (UT_hash_table *)uthash_malloc(sizeof(UT_hash_table));             \
+        if (!(head)->hh.tbl)                                                   \
+        {                                                                      \
+            HASH_RECORD_OOM(oomed);                                            \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table));               \
+            (head)->hh.tbl->tail = &((head)->hh);                              \
+            (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;            \
+            (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;  \
+            (head)->hh.tbl->hho = (char *)(&(head)->hh) - (char *)(head);      \
+            (head)->hh.tbl->buckets = (UT_hash_bucket *)uthash_malloc(         \
+                HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket));     \
+            (head)->hh.tbl->signature = HASH_SIGNATURE;                        \
+            if (!(head)->hh.tbl->buckets)                                      \
+            {                                                                  \
+                HASH_RECORD_OOM(oomed);                                        \
+                uthash_free((head)->hh.tbl, sizeof(UT_hash_table));            \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                uthash_bzero((head)->hh.tbl->buckets,                          \
+                             HASH_INITIAL_NUM_BUCKETS *                        \
+                                 sizeof(struct UT_hash_bucket));               \
+                HASH_BLOOM_MAKE((head)->hh.tbl, oomed);                        \
+                IF_HASH_NONFATAL_OOM(if (oomed) {                              \
+                    uthash_free((head)->hh.tbl->buckets,                       \
+                                HASH_INITIAL_NUM_BUCKETS *                     \
+                                    sizeof(struct UT_hash_bucket));            \
+                    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));        \
+                })                                                             \
+            }                                                                  \
+        }                                                                      \
+    } while (0)
+
+#define HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in,       \
+                                         hashval, add, replaced, cmpfcn)       \
+    do                                                                         \
+    {                                                                          \
+        (replaced) = NULL;                                                     \
+        HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in,        \
+                              hashval, replaced);                              \
+        if (replaced)                                                          \
+        {                                                                      \
+            HASH_DELETE(hh, head, replaced);                                   \
+        }                                                                      \
+        HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname),     \
+                                            keylen_in, hashval, add, cmpfcn);  \
+    } while (0)
+
+#define HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add, \
+                                 replaced)                                     \
+    do                                                                         \
+    {                                                                          \
+        (replaced) = NULL;                                                     \
+        HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in,        \
+                              hashval, replaced);                              \
+        if (replaced)                                                          \
+        {                                                                      \
+            HASH_DELETE(hh, head, replaced);                                   \
+        }                                                                      \
+        HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in,  \
+                                    hashval, add);                             \
+    } while (0)
+
+#define HASH_REPLACE(hh, head, fieldname, keylen_in, add, replaced)            \
+    do                                                                         \
+    {                                                                          \
+        unsigned _hr_hashv;                                                    \
+        HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                 \
+        HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv,    \
+                                 add, replaced);                               \
+    } while (0)
+
+#define HASH_REPLACE_INORDER(hh, head, fieldname, keylen_in, add, replaced,    \
+                             cmpfcn)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned _hr_hashv;                                                    \
+        HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                 \
+        HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in,       \
+                                         _hr_hashv, add, replaced, cmpfcn);    \
+    } while (0)
+
+#define HASH_APPEND_LIST(hh, head, add)                                        \
+    do                                                                         \
+    {                                                                          \
+        (add)->hh.next = NULL;                                                 \
+        (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);   \
+        (head)->hh.tbl->tail->next = (add);                                    \
+        (head)->hh.tbl->tail = &((add)->hh);                                   \
+    } while (0)
+
+#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn)                            \
+    do                                                                         \
+    {                                                                          \
+        do                                                                     \
+        {                                                                      \
+            if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0)                     \
+            {                                                                  \
+                break;                                                         \
+            }                                                                  \
+        } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next));   \
+    } while (0)
+
+#ifdef NO_DECLTYPE
+#undef HASH_AKBI_INNER_LOOP
+#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn)                            \
+    do                                                                         \
+    {                                                                          \
+        char *_hs_saved_head = (char *)(head);                                 \
+        do                                                                     \
+        {                                                                      \
+            DECLTYPE_ASSIGN(head, _hs_iter);                                   \
+            if (cmpfcn(head, add) > 0)                                         \
+            {                                                                  \
+                DECLTYPE_ASSIGN(head, _hs_saved_head);                         \
+                break;                                                         \
+            }                                                                  \
+            DECLTYPE_ASSIGN(head, _hs_saved_head);                             \
+        } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next));   \
+    } while (0)
+#endif
+
+#if HASH_NONFATAL_OOM
+
+#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed)    \
+    do                                                                         \
+    {                                                                          \
+        if (!(oomed))                                                          \
+        {                                                                      \
+            unsigned _ha_bkt;                                                  \
+            (head)->hh.tbl->num_items++;                                       \
+            HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);        \
+            HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh,  \
+                            oomed);                                            \
+            if (oomed)                                                         \
+            {                                                                  \
+                HASH_ROLLBACK_BKT(hh, head, &(add)->hh);                       \
+                HASH_DELETE_HH(hh, head, &(add)->hh);                          \
+                (add)->hh.tbl = NULL;                                          \
+                uthash_nonfatal_oom(add);                                      \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                HASH_BLOOM_ADD((head)->hh.tbl, hashval);                       \
+                HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                    \
+            }                                                                  \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            (add)->hh.tbl = NULL;                                              \
+            uthash_nonfatal_oom(add);                                          \
+        }                                                                      \
+    } while (0)
+
+#else
+
+#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed)    \
+    do                                                                         \
+    {                                                                          \
+        unsigned _ha_bkt;                                                      \
+        (head)->hh.tbl->num_items++;                                           \
+        HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);            \
+        HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh,      \
+                        oomed);                                                \
+        HASH_BLOOM_ADD((head)->hh.tbl, hashval);                               \
+        HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                            \
+    } while (0)
+
+#endif
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in,       \
+                                            hashval, add, cmpfcn)              \
+    do                                                                         \
+    {                                                                          \
+        IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;)                               \
+        (add)->hh.hashv = (hashval);                                           \
+        (add)->hh.key = (char *)(keyptr);                                      \
+        (add)->hh.keylen = (unsigned)(keylen_in);                              \
+        if (!(head))                                                           \
+        {                                                                      \
+            (add)->hh.next = NULL;                                             \
+            (add)->hh.prev = NULL;                                             \
+            HASH_MAKE_TABLE(hh, add, _ha_oomed);                               \
+            IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { )                                    \
+      (head) = (add);                                                          \
+    IF_HASH_NONFATAL_OOM(                                                      \
+            })                                                                 \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            void *_hs_iter = (head);                                           \
+            (add)->hh.tbl = (head)->hh.tbl;                                    \
+            HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn);                       \
+            if (_hs_iter)                                                      \
+            {                                                                  \
+                (add)->hh.next = _hs_iter;                                     \
+                if (((add)->hh.prev =                                          \
+                         HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev))        \
+                {                                                              \
+                    HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next =       \
+                        (add);                                                 \
+                }                                                              \
+                else                                                           \
+                {                                                              \
+                    (head) = (add);                                            \
+                }                                                              \
+                HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add);          \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                HASH_APPEND_LIST(hh, head, add);                               \
+            }                                                                  \
+        }                                                                      \
+        HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add,           \
+                          _ha_oomed);                                          \
+        HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER");            \
+    } while (0)
+
+#define HASH_ADD_KEYPTR_INORDER(hh, head, keyptr, keylen_in, add, cmpfcn)      \
+    do                                                                         \
+    {                                                                          \
+        unsigned _hs_hashv;                                                    \
+        HASH_VALUE(keyptr, keylen_in, _hs_hashv);                              \
+        HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in,       \
+                                            _hs_hashv, add, cmpfcn);           \
+    } while (0)
+
+#define HASH_ADD_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, hashval,  \
+                                     add, cmpfcn)                              \
+    HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname),         \
+                                        keylen_in, hashval, add, cmpfcn)
+
+#define HASH_ADD_INORDER(hh, head, fieldname, keylen_in, add, cmpfcn)          \
+    HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add,     \
+                            cmpfcn)
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, hashval, add) \
+    do                                                                         \
+    {                                                                          \
+        IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;)                               \
+        (add)->hh.hashv = (hashval);                                           \
+        (add)->hh.key = (const void *)(keyptr);                                \
+        (add)->hh.keylen = (unsigned)(keylen_in);                              \
+        if (!(head))                                                           \
+        {                                                                      \
+            (add)->hh.next = NULL;                                             \
+            (add)->hh.prev = NULL;                                             \
+            HASH_MAKE_TABLE(hh, add, _ha_oomed);                               \
+            IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { )                                    \
+      (head) = (add);                                                          \
+    IF_HASH_NONFATAL_OOM(                                                      \
+            })                                                                 \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            (add)->hh.tbl = (head)->hh.tbl;                                    \
+            HASH_APPEND_LIST(hh, head, add);                                   \
+        }                                                                      \
+        HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add,           \
+                          _ha_oomed);                                          \
+        HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE");                    \
+    } while (0)
+
+#define HASH_ADD_KEYPTR(hh, head, keyptr, keylen_in, add)                      \
+    do                                                                         \
+    {                                                                          \
+        unsigned _ha_hashv;                                                    \
+        HASH_VALUE(keyptr, keylen_in, _ha_hashv);                              \
+        HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv,    \
+                                    add);                                      \
+    } while (0)
+
+#define HASH_ADD_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add)     \
+    HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in,      \
+                                hashval, add)
+
+#define HASH_ADD(hh, head, fieldname, keylen_in, add)                          \
+    HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add)
+
+#define HASH_TO_BKT(hashv, num_bkts, bkt)                                      \
+    do                                                                         \
+    {                                                                          \
+        bkt = ((hashv) & ((num_bkts) - 1U));                                   \
+    } while (0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh, head, delptr) HASH_DELETE_HH(hh, head, &(delptr)->hh)
+
+#define HASH_DELETE_HH(hh, head, delptrhh)                                     \
+    do                                                                         \
+    {                                                                          \
+        const struct UT_hash_handle *_hd_hh_del = (delptrhh);                  \
+        if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL))          \
+        {                                                                      \
+            HASH_BLOOM_FREE((head)->hh.tbl);                                   \
+            uthash_free((head)->hh.tbl->buckets,                               \
+                        (head)->hh.tbl->num_buckets *                          \
+                            sizeof(struct UT_hash_bucket));                    \
+            uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                \
+            (head) = NULL;                                                     \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            unsigned _hd_bkt;                                                  \
+            if (_hd_hh_del == (head)->hh.tbl->tail)                            \
+            {                                                                  \
+                (head)->hh.tbl->tail =                                         \
+                    HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev);            \
+            }                                                                  \
+            if (_hd_hh_del->prev != NULL)                                      \
+            {                                                                  \
+                HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next =         \
+                    _hd_hh_del->next;                                          \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                DECLTYPE_ASSIGN(head, _hd_hh_del->next);                       \
+            }                                                                  \
+            if (_hd_hh_del->next != NULL)                                      \
+            {                                                                  \
+                HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev =         \
+                    _hd_hh_del->prev;                                          \
+            }                                                                  \
+            HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets,        \
+                        _hd_bkt);                                              \
+            HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);     \
+            (head)->hh.tbl->num_items--;                                       \
+        }                                                                      \
+        HASH_FSCK(hh, head, "HASH_DELETE_HH");                                 \
+    } while (0)
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head, findstr, out)                                      \
+    do                                                                         \
+    {                                                                          \
+        unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr);      \
+        HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out);               \
+    } while (0)
+#define HASH_ADD_STR(head, strfield, add)                                      \
+    do                                                                         \
+    {                                                                          \
+        unsigned _uthash_hastr_keylen =                                        \
+            (unsigned)uthash_strlen((add)->strfield);                          \
+        HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add);            \
+    } while (0)
+#define HASH_REPLACE_STR(head, strfield, add, replaced)                        \
+    do                                                                         \
+    {                                                                          \
+        unsigned _uthash_hrstr_keylen =                                        \
+            (unsigned)uthash_strlen((add)->strfield);                          \
+        HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add,         \
+                     replaced);                                                \
+    } while (0)
+#define HASH_FIND_INT(head, findint, out)                                      \
+    HASH_FIND(hh, head, findint, sizeof(int), out)
+#define HASH_ADD_INT(head, intfield, add)                                      \
+    HASH_ADD(hh, head, intfield, sizeof(int), add)
+#define HASH_REPLACE_INT(head, intfield, add, replaced)                        \
+    HASH_REPLACE(hh, head, intfield, sizeof(int), add, replaced)
+#define HASH_FIND_PTR(head, findptr, out)                                      \
+    HASH_FIND(hh, head, findptr, sizeof(void *), out)
+#define HASH_ADD_PTR(head, ptrfield, add)                                      \
+    HASH_ADD(hh, head, ptrfield, sizeof(void *), add)
+#define HASH_REPLACE_PTR(head, ptrfield, add, replaced)                        \
+    HASH_REPLACE(hh, head, ptrfield, sizeof(void *), add, replaced)
+#define HASH_DEL(head, delptr) HASH_DELETE(hh, head, delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is
+ * defined. This is for uthash developer only; it compiles away if HASH_DEBUG
+ * isn't defined.
+ */
+#ifdef HASH_DEBUG
+#include <stdio.h> /* fprintf, stderr */
+#define HASH_OOPS(...)                                                         \
+    do                                                                         \
+    {                                                                          \
+        fprintf(stderr, __VA_ARGS__);                                          \
+        exit(-1);                                                              \
+    } while (0)
+#define HASH_FSCK(hh, head, where)                                             \
+    do                                                                         \
+    {                                                                          \
+        struct UT_hash_handle *_thh;                                           \
+        if (head)                                                              \
+        {                                                                      \
+            unsigned _bkt_i;                                                   \
+            unsigned _count = 0;                                               \
+            char *_prev;                                                       \
+            for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i)   \
+            {                                                                  \
+                unsigned _bkt_count = 0;                                       \
+                _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                \
+                _prev = NULL;                                                  \
+                while (_thh)                                                   \
+                {                                                              \
+                    if (_prev != (char *)(_thh->hh_prev))                      \
+                    {                                                          \
+                        HASH_OOPS("%s: invalid hh_prev %p, actual %p\n",       \
+                                  (where), (void *)_thh->hh_prev,              \
+                                  (void *)_prev);                              \
+                    }                                                          \
+                    _bkt_count++;                                              \
+                    _prev = (char *)(_thh);                                    \
+                    _thh = _thh->hh_next;                                      \
+                }                                                              \
+                _count += _bkt_count;                                          \
+                if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count)       \
+                {                                                              \
+                    HASH_OOPS("%s: invalid bucket count %u, actual %u\n",      \
+                              (where), (head)->hh.tbl->buckets[_bkt_i].count,  \
+                              _bkt_count);                                     \
+                }                                                              \
+            }                                                                  \
+            if (_count != (head)->hh.tbl->num_items)                           \
+            {                                                                  \
+                HASH_OOPS("%s: invalid hh item count %u, actual %u\n",         \
+                          (where), (head)->hh.tbl->num_items, _count);         \
+            }                                                                  \
+            _count = 0;                                                        \
+            _prev = NULL;                                                      \
+            _thh = &(head)->hh;                                                \
+            while (_thh)                                                       \
+            {                                                                  \
+                _count++;                                                      \
+                if (_prev != (char *)_thh->prev)                               \
+                {                                                              \
+                    HASH_OOPS("%s: invalid prev %p, actual %p\n", (where),     \
+                              (void *)_thh->prev, (void *)_prev);              \
+                }                                                              \
+                _prev = (char *)ELMT_FROM_HH((head)->hh.tbl, _thh);            \
+                _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next)  \
+                                   : NULL);                                    \
+            }                                                                  \
+            if (_count != (head)->hh.tbl->num_items)                           \
+            {                                                                  \
+                HASH_OOPS("%s: invalid app item count %u, actual %u\n",        \
+                          (where), (head)->hh.tbl->num_items, _count);         \
+            }                                                                  \
+        }                                                                      \
+    } while (0)
+#else
+#define HASH_FSCK(hh, head, where)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen)                              \
+    do                                                                         \
+    {                                                                          \
+        unsigned _klen = fieldlen;                                             \
+        write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                          \
+        write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                \
+    } while (0)
+#else
+#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen)
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33.
+ */
+#define HASH_BER(key, keylen, hashv)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned _hb_keylen = (unsigned)keylen;                                \
+        const unsigned char *_hb_key = (const unsigned char *)(key);           \
+        (hashv) = 0;                                                           \
+        while (_hb_keylen-- != 0U)                                             \
+        {                                                                      \
+            (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                 \
+        }                                                                      \
+    } while (0)
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+ * (archive link: https://archive.is/Ivcan )
+ */
+#define HASH_SAX(key, keylen, hashv)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned _sx_i;                                                        \
+        const unsigned char *_hs_key = (const unsigned char *)(key);           \
+        hashv = 0;                                                             \
+        for (_sx_i = 0; _sx_i < keylen; _sx_i++)                               \
+        {                                                                      \
+            hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];             \
+        }                                                                      \
+    } while (0)
+/* FNV-1a variation */
+#define HASH_FNV(key, keylen, hashv)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned _fn_i;                                                        \
+        const unsigned char *_hf_key = (const unsigned char *)(key);           \
+        (hashv) = 2166136261U;                                                 \
+        for (_fn_i = 0; _fn_i < keylen; _fn_i++)                               \
+        {                                                                      \
+            hashv = hashv ^ _hf_key[_fn_i];                                    \
+            hashv = hashv * 16777619U;                                         \
+        }                                                                      \
+    } while (0)
+
+#define HASH_OAT(key, keylen, hashv)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned _ho_i;                                                        \
+        const unsigned char *_ho_key = (const unsigned char *)(key);           \
+        hashv = 0;                                                             \
+        for (_ho_i = 0; _ho_i < keylen; _ho_i++)                               \
+        {                                                                      \
+            hashv += _ho_key[_ho_i];                                           \
+            hashv += (hashv << 10);                                            \
+            hashv ^= (hashv >> 6);                                             \
+        }                                                                      \
+        hashv += (hashv << 3);                                                 \
+        hashv ^= (hashv >> 11);                                                \
+        hashv += (hashv << 15);                                                \
+    } while (0)
+
+#define HASH_JEN_MIX(a, b, c)                                                  \
+    do                                                                         \
+    {                                                                          \
+        a -= b;                                                                \
+        a -= c;                                                                \
+        a ^= (c >> 13);                                                        \
+        b -= c;                                                                \
+        b -= a;                                                                \
+        b ^= (a << 8);                                                         \
+        c -= a;                                                                \
+        c -= b;                                                                \
+        c ^= (b >> 13);                                                        \
+        a -= b;                                                                \
+        a -= c;                                                                \
+        a ^= (c >> 12);                                                        \
+        b -= c;                                                                \
+        b -= a;                                                                \
+        b ^= (a << 16);                                                        \
+        c -= a;                                                                \
+        c -= b;                                                                \
+        c ^= (b >> 5);                                                         \
+        a -= b;                                                                \
+        a -= c;                                                                \
+        a ^= (c >> 3);                                                         \
+        b -= c;                                                                \
+        b -= a;                                                                \
+        b ^= (a << 10);                                                        \
+        c -= a;                                                                \
+        c -= b;                                                                \
+        c ^= (b >> 15);                                                        \
+    } while (0)
+
+#define HASH_JEN(key, keylen, hashv)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned _hj_i, _hj_j, _hj_k;                                          \
+        unsigned const char *_hj_key = (unsigned const char *)(key);           \
+        hashv = 0xfeedbeefu;                                                   \
+        _hj_i = _hj_j = 0x9e3779b9u;                                           \
+        _hj_k = (unsigned)(keylen);                                            \
+        while (_hj_k >= 12U)                                                   \
+        {                                                                      \
+            _hj_i +=                                                           \
+                (_hj_key[0] + ((unsigned)_hj_key[1] << 8) +                    \
+                 ((unsigned)_hj_key[2] << 16) + ((unsigned)_hj_key[3] << 24)); \
+            _hj_j +=                                                           \
+                (_hj_key[4] + ((unsigned)_hj_key[5] << 8) +                    \
+                 ((unsigned)_hj_key[6] << 16) + ((unsigned)_hj_key[7] << 24)); \
+            hashv += (_hj_key[8] + ((unsigned)_hj_key[9] << 8) +               \
+                      ((unsigned)_hj_key[10] << 16) +                          \
+                      ((unsigned)_hj_key[11] << 24));                          \
+                                                                               \
+            HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                 \
+                                                                               \
+            _hj_key += 12;                                                     \
+            _hj_k -= 12U;                                                      \
+        }                                                                      \
+        hashv += (unsigned)(keylen);                                           \
+        switch (_hj_k)                                                         \
+        {                                                                      \
+        case 11:                                                               \
+            hashv += ((unsigned)_hj_key[10] << 24); /* FALLTHROUGH */          \
+        case 10:                                                               \
+            hashv += ((unsigned)_hj_key[9] << 16); /* FALLTHROUGH */           \
+        case 9:                                                                \
+            hashv += ((unsigned)_hj_key[8] << 8); /* FALLTHROUGH */            \
+        case 8:                                                                \
+            _hj_j += ((unsigned)_hj_key[7] << 24); /* FALLTHROUGH */           \
+        case 7:                                                                \
+            _hj_j += ((unsigned)_hj_key[6] << 16); /* FALLTHROUGH */           \
+        case 6:                                                                \
+            _hj_j += ((unsigned)_hj_key[5] << 8); /* FALLTHROUGH */            \
+        case 5:                                                                \
+            _hj_j += _hj_key[4]; /* FALLTHROUGH */                             \
+        case 4:                                                                \
+            _hj_i += ((unsigned)_hj_key[3] << 24); /* FALLTHROUGH */           \
+        case 3:                                                                \
+            _hj_i += ((unsigned)_hj_key[2] << 16); /* FALLTHROUGH */           \
+        case 2:                                                                \
+            _hj_i += ((unsigned)_hj_key[1] << 8); /* FALLTHROUGH */            \
+        case 1:                                                                \
+            _hj_i += _hj_key[0]; /* FALLTHROUGH */                             \
+        default:;                                                              \
+        }                                                                      \
+        HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                     \
+    } while (0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) ||        \
+    defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__)
+#define get16bits(d) (*((const uint16_t *)(d)))
+#endif
+
+#if !defined(get16bits)
+#define get16bits(d)                                                           \
+    ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) +                          \
+     (uint32_t)(((const uint8_t *)(d))[0]))
+#endif
+#define HASH_SFH(key, keylen, hashv)                                           \
+    do                                                                         \
+    {                                                                          \
+        unsigned const char *_sfh_key = (unsigned const char *)(key);          \
+        uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                        \
+                                                                               \
+        unsigned _sfh_rem = _sfh_len & 3U;                                     \
+        _sfh_len >>= 2;                                                        \
+        hashv = 0xcafebabeu;                                                   \
+                                                                               \
+        /* Main loop */                                                        \
+        for (; _sfh_len > 0U; _sfh_len--)                                      \
+        {                                                                      \
+            hashv += get16bits(_sfh_key);                                      \
+            _sfh_tmp = ((uint32_t)(get16bits(_sfh_key + 2)) << 11) ^ hashv;    \
+            hashv = (hashv << 16) ^ _sfh_tmp;                                  \
+            _sfh_key += 2U * sizeof(uint16_t);                                 \
+            hashv += hashv >> 11;                                              \
+        }                                                                      \
+                                                                               \
+        /* Handle end cases */                                                 \
+        switch (_sfh_rem)                                                      \
+        {                                                                      \
+        case 3:                                                                \
+            hashv += get16bits(_sfh_key);                                      \
+            hashv ^= hashv << 16;                                              \
+            hashv ^= (uint32_t)(_sfh_key[sizeof(uint16_t)]) << 18;             \
+            hashv += hashv >> 11;                                              \
+            break;                                                             \
+        case 2:                                                                \
+            hashv += get16bits(_sfh_key);                                      \
+            hashv ^= hashv << 11;                                              \
+            hashv += hashv >> 17;                                              \
+            break;                                                             \
+        case 1:                                                                \
+            hashv += *_sfh_key;                                                \
+            hashv ^= hashv << 10;                                              \
+            hashv += hashv >> 1;                                               \
+            break;                                                             \
+        default:;                                                              \
+        }                                                                      \
+                                                                               \
+        /* Force "avalanching" of final 127 bits */                            \
+        hashv ^= hashv << 3;                                                   \
+        hashv += hashv >> 5;                                                   \
+        hashv ^= hashv << 4;                                                   \
+        hashv += hashv >> 17;                                                  \
+        hashv ^= hashv << 25;                                                  \
+        hashv += hashv >> 6;                                                   \
+    } while (0)
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl, hh, head, keyptr, keylen_in, hashval, out)       \
+    do                                                                         \
+    {                                                                          \
+        if ((head).hh_head != NULL)                                            \
+        {                                                                      \
+            DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head));           \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            (out) = NULL;                                                      \
+        }                                                                      \
+        while ((out) != NULL)                                                  \
+        {                                                                      \
+            if ((out)->hh.hashv == (hashval) &&                                \
+                (out)->hh.keylen == (keylen_in))                               \
+            {                                                                  \
+                if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0)        \
+                {                                                              \
+                    break;                                                     \
+                }                                                              \
+            }                                                                  \
+            if ((out)->hh.hh_next != NULL)                                     \
+            {                                                                  \
+                DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next));    \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                (out) = NULL;                                                  \
+            }                                                                  \
+        }                                                                      \
+    } while (0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head, hh, addhh, oomed)                                \
+    do                                                                         \
+    {                                                                          \
+        UT_hash_bucket *_ha_head = &(head);                                    \
+        _ha_head->count++;                                                     \
+        (addhh)->hh_next = _ha_head->hh_head;                                  \
+        (addhh)->hh_prev = NULL;                                               \
+        if (_ha_head->hh_head != NULL)                                         \
+        {                                                                      \
+            _ha_head->hh_head->hh_prev = (addhh);                              \
+        }                                                                      \
+        _ha_head->hh_head = (addhh);                                           \
+        if ((_ha_head->count >=                                                \
+             ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) &&     \
+            !(addhh)->tbl->noexpand)                                           \
+        {                                                                      \
+            HASH_EXPAND_BUCKETS(addhh, (addhh)->tbl, oomed);                   \
+            IF_HASH_NONFATAL_OOM(if (oomed) { HASH_DEL_IN_BKT(head, addhh); }) \
+        }                                                                      \
+    } while (0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(head, delhh)                                           \
+    do                                                                         \
+    {                                                                          \
+        UT_hash_bucket *_hd_head = &(head);                                    \
+        _hd_head->count--;                                                     \
+        if (_hd_head->hh_head == (delhh))                                      \
+        {                                                                      \
+            _hd_head->hh_head = (delhh)->hh_next;                              \
+        }                                                                      \
+        if ((delhh)->hh_prev)                                                  \
+        {                                                                      \
+            (delhh)->hh_prev->hh_next = (delhh)->hh_next;                      \
+        }                                                                      \
+        if ((delhh)->hh_next)                                                  \
+        {                                                                      \
+            (delhh)->hh_next->hh_prev = (delhh)->hh_prev;                      \
+        }                                                                      \
+    } while (0)
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(hh, tbl, oomed)                                    \
+    do                                                                         \
+    {                                                                          \
+        unsigned _he_bkt;                                                      \
+        unsigned _he_bkt_i;                                                    \
+        struct UT_hash_handle *_he_thh, *_he_hh_nxt;                           \
+        UT_hash_bucket *_he_new_buckets, *_he_newbkt;                          \
+        _he_new_buckets = (UT_hash_bucket *)uthash_malloc(                     \
+            sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U);          \
+        if (!_he_new_buckets)                                                  \
+        {                                                                      \
+            HASH_RECORD_OOM(oomed);                                            \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            uthash_bzero(_he_new_buckets, sizeof(struct UT_hash_bucket) *      \
+                                              (tbl)->num_buckets * 2U);        \
+            (tbl)->ideal_chain_maxlen =                                        \
+                ((tbl)->num_items >> ((tbl)->log2_num_buckets + 1U)) +         \
+                ((((tbl)->num_items & (((tbl)->num_buckets * 2U) - 1U)) != 0U) \
+                     ? 1U                                                      \
+                     : 0U);                                                    \
+            (tbl)->nonideal_items = 0;                                         \
+            for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++)   \
+            {                                                                  \
+                _he_thh = (tbl)->buckets[_he_bkt_i].hh_head;                   \
+                while (_he_thh != NULL)                                        \
+                {                                                              \
+                    _he_hh_nxt = _he_thh->hh_next;                             \
+                    HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U,       \
+                                _he_bkt);                                      \
+                    _he_newbkt = &(_he_new_buckets[_he_bkt]);                  \
+                    if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen)     \
+                    {                                                          \
+                        (tbl)->nonideal_items++;                               \
+                        if (_he_newbkt->count > _he_newbkt->expand_mult *      \
+                                                    (tbl)->ideal_chain_maxlen) \
+                        {                                                      \
+                            _he_newbkt->expand_mult++;                         \
+                        }                                                      \
+                    }                                                          \
+                    _he_thh->hh_prev = NULL;                                   \
+                    _he_thh->hh_next = _he_newbkt->hh_head;                    \
+                    if (_he_newbkt->hh_head != NULL)                           \
+                    {                                                          \
+                        _he_newbkt->hh_head->hh_prev = _he_thh;                \
+                    }                                                          \
+                    _he_newbkt->hh_head = _he_thh;                             \
+                    _he_thh = _he_hh_nxt;                                      \
+                }                                                              \
+            }                                                                  \
+            uthash_free((tbl)->buckets,                                        \
+                        (tbl)->num_buckets * sizeof(struct UT_hash_bucket));   \
+            (tbl)->num_buckets *= 2U;                                          \
+            (tbl)->log2_num_buckets++;                                         \
+            (tbl)->buckets = _he_new_buckets;                                  \
+            (tbl)->ineff_expands =                                             \
+                ((tbl)->nonideal_items > ((tbl)->num_items >> 1))              \
+                    ? ((tbl)->ineff_expands + 1U)                              \
+                    : 0U;                                                      \
+            if ((tbl)->ineff_expands > 1U)                                     \
+            {                                                                  \
+                (tbl)->noexpand = 1;                                           \
+                uthash_noexpand_fyi(tbl);                                      \
+            }                                                                  \
+            uthash_expand_fyi(tbl);                                            \
+        }                                                                      \
+    } while (0)
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head, cmpfcn) HASH_SRT(hh, head, cmpfcn)
+#define HASH_SRT(hh, head, cmpfcn)                                             \
+    do                                                                         \
+    {                                                                          \
+        unsigned _hs_i;                                                        \
+        unsigned _hs_looping, _hs_nmerges, _hs_insize, _hs_psize, _hs_qsize;   \
+        struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;    \
+        if (head != NULL)                                                      \
+        {                                                                      \
+            _hs_insize = 1;                                                    \
+            _hs_looping = 1;                                                   \
+            _hs_list = &((head)->hh);                                          \
+            while (_hs_looping != 0U)                                          \
+            {                                                                  \
+                _hs_p = _hs_list;                                              \
+                _hs_list = NULL;                                               \
+                _hs_tail = NULL;                                               \
+                _hs_nmerges = 0;                                               \
+                while (_hs_p != NULL)                                          \
+                {                                                              \
+                    _hs_nmerges++;                                             \
+                    _hs_q = _hs_p;                                             \
+                    _hs_psize = 0;                                             \
+                    for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i)               \
+                    {                                                          \
+                        _hs_psize++;                                           \
+                        _hs_q =                                                \
+                            ((_hs_q->next != NULL)                             \
+                                 ? HH_FROM_ELMT((head)->hh.tbl, _hs_q->next)   \
+                                 : NULL);                                      \
+                        if (_hs_q == NULL)                                     \
+                        {                                                      \
+                            break;                                             \
+                        }                                                      \
+                    }                                                          \
+                    _hs_qsize = _hs_insize;                                    \
+                    while ((_hs_psize != 0U) ||                                \
+                           ((_hs_qsize != 0U) && (_hs_q != NULL)))             \
+                    {                                                          \
+                        if (_hs_psize == 0U)                                   \
+                        {                                                      \
+                            _hs_e = _hs_q;                                     \
+                            _hs_q = ((_hs_q->next != NULL)                     \
+                                         ? HH_FROM_ELMT((head)->hh.tbl,        \
+                                                        _hs_q->next)           \
+                                         : NULL);                              \
+                            _hs_qsize--;                                       \
+                        }                                                      \
+                        else if ((_hs_qsize == 0U) || (_hs_q == NULL))         \
+                        {                                                      \
+                            _hs_e = _hs_p;                                     \
+                            if (_hs_p != NULL)                                 \
+                            {                                                  \
+                                _hs_p = ((_hs_p->next != NULL)                 \
+                                             ? HH_FROM_ELMT((head)->hh.tbl,    \
+                                                            _hs_p->next)       \
+                                             : NULL);                          \
+                            }                                                  \
+                            _hs_psize--;                                       \
+                        }                                                      \
+                        else if ((cmpfcn(DECLTYPE(head)(ELMT_FROM_HH(          \
+                                             (head)->hh.tbl, _hs_p)),          \
+                                         DECLTYPE(head)(ELMT_FROM_HH(          \
+                                             (head)->hh.tbl, _hs_q)))) <= 0)   \
+                        {                                                      \
+                            _hs_e = _hs_p;                                     \
+                            if (_hs_p != NULL)                                 \
+                            {                                                  \
+                                _hs_p = ((_hs_p->next != NULL)                 \
+                                             ? HH_FROM_ELMT((head)->hh.tbl,    \
+                                                            _hs_p->next)       \
+                                             : NULL);                          \
+                            }                                                  \
+                            _hs_psize--;                                       \
+                        }                                                      \
+                        else                                                   \
+                        {                                                      \
+                            _hs_e = _hs_q;                                     \
+                            _hs_q = ((_hs_q->next != NULL)                     \
+                                         ? HH_FROM_ELMT((head)->hh.tbl,        \
+                                                        _hs_q->next)           \
+                                         : NULL);                              \
+                            _hs_qsize--;                                       \
+                        }                                                      \
+                        if (_hs_tail != NULL)                                  \
+                        {                                                      \
+                            _hs_tail->next =                                   \
+                                ((_hs_e != NULL)                               \
+                                     ? ELMT_FROM_HH((head)->hh.tbl, _hs_e)     \
+                                     : NULL);                                  \
+                        }                                                      \
+                        else                                                   \
+                        {                                                      \
+                            _hs_list = _hs_e;                                  \
+                        }                                                      \
+                        if (_hs_e != NULL)                                     \
+                        {                                                      \
+                            _hs_e->prev =                                      \
+                                ((_hs_tail != NULL)                            \
+                                     ? ELMT_FROM_HH((head)->hh.tbl, _hs_tail)  \
+                                     : NULL);                                  \
+                        }                                                      \
+                        _hs_tail = _hs_e;                                      \
+                    }                                                          \
+                    _hs_p = _hs_q;                                             \
+                }                                                              \
+                if (_hs_tail != NULL)                                          \
+                {                                                              \
+                    _hs_tail->next = NULL;                                     \
+                }                                                              \
+                if (_hs_nmerges <= 1U)                                         \
+                {                                                              \
+                    _hs_looping = 0;                                           \
+                    (head)->hh.tbl->tail = _hs_tail;                           \
+                    DECLTYPE_ASSIGN(head,                                      \
+                                    ELMT_FROM_HH((head)->hh.tbl, _hs_list));   \
+                }                                                              \
+                _hs_insize *= 2U;                                              \
+            }                                                                  \
+            HASH_FSCK(hh, head, "HASH_SRT");                                   \
+        }                                                                      \
+    } while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                            \
+    do                                                                         \
+    {                                                                          \
+        unsigned _src_bkt, _dst_bkt;                                           \
+        void *_last_elt = NULL, *_elt;                                         \
+        UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh = NULL;               \
+        ptrdiff_t _dst_hho = ((char *)(&(dst)->hh_dst) - (char *)(dst));       \
+        if ((src) != NULL)                                                     \
+        {                                                                      \
+            for (_src_bkt = 0; _src_bkt < (src)->hh_src.tbl->num_buckets;      \
+                 _src_bkt++)                                                   \
+            {                                                                  \
+                for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;   \
+                     _src_hh != NULL; _src_hh = _src_hh->hh_next)              \
+                {                                                              \
+                    _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);           \
+                    if (cond(_elt))                                            \
+                    {                                                          \
+                        IF_HASH_NONFATAL_OOM(int _hs_oomed = 0;)               \
+                        _dst_hh = (UT_hash_handle *)(void *)(((char *)_elt) +  \
+                                                             _dst_hho);        \
+                        _dst_hh->key = _src_hh->key;                           \
+                        _dst_hh->keylen = _src_hh->keylen;                     \
+                        _dst_hh->hashv = _src_hh->hashv;                       \
+                        _dst_hh->prev = _last_elt;                             \
+                        _dst_hh->next = NULL;                                  \
+                        if (_last_elt_hh != NULL)                              \
+                        {                                                      \
+                            _last_elt_hh->next = _elt;                         \
+                        }                                                      \
+                        if ((dst) == NULL)                                     \
+                        {                                                      \
+                            DECLTYPE_ASSIGN(dst, _elt);                        \
+                            HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed);           \
+                            IF_HASH_NONFATAL_OOM(if (_hs_oomed) {              \
+                                uthash_nonfatal_oom(_elt);                     \
+                                (dst) = NULL;                                  \
+                                continue;                                      \
+                            })                                                 \
+                        }                                                      \
+                        else                                                   \
+                        {                                                      \
+                            _dst_hh->tbl = (dst)->hh_dst.tbl;                  \
+                        }                                                      \
+                        HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, \
+                                    _dst_bkt);                                 \
+                        HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],       \
+                                        hh_dst, _dst_hh, _hs_oomed);           \
+                        (dst)->hh_dst.tbl->num_items++;                        \
+                        IF_HASH_NONFATAL_OOM(if (_hs_oomed) {                  \
+                            HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh);           \
+                            HASH_DELETE_HH(hh_dst, dst, _dst_hh);              \
+                            _dst_hh->tbl = NULL;                               \
+                            uthash_nonfatal_oom(_elt);                         \
+                            continue;                                          \
+                        })                                                     \
+                        HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv);          \
+                        _last_elt = _elt;                                      \
+                        _last_elt_hh = _dst_hh;                                \
+                    }                                                          \
+                }                                                              \
+            }                                                                  \
+        }                                                                      \
+        HASH_FSCK(hh_dst, dst, "HASH_SELECT");                                 \
+    } while (0)
+
+#define HASH_CLEAR(hh, head)                                                   \
+    do                                                                         \
+    {                                                                          \
+        if ((head) != NULL)                                                    \
+        {                                                                      \
+            HASH_BLOOM_FREE((head)->hh.tbl);                                   \
+            uthash_free((head)->hh.tbl->buckets,                               \
+                        (head)->hh.tbl->num_buckets *                          \
+                            sizeof(struct UT_hash_bucket));                    \
+            uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                \
+            (head) = NULL;                                                     \
+        }                                                                      \
+    } while (0)
+
+#define HASH_OVERHEAD(hh, head)                                                \
+    (((head) != NULL)                                                          \
+         ? ((size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) +    \
+                     ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) +  \
+                     sizeof(UT_hash_table) + (HASH_BLOOM_BYTELEN)))            \
+         : 0U)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh, head, el, tmp)                                           \
+    for (((el) = (head)),                                                      \
+         ((*(char **)(&(tmp))) =                                               \
+              (char *)((head != NULL) ? (head)->hh.next : NULL));              \
+         (el) != NULL; ((el) = (tmp)),                                         \
+                       ((*(char **)(&(tmp))) =                                 \
+                            (char *)((tmp != NULL) ? (tmp)->hh.next : NULL)))
+#else
+#define HASH_ITER(hh, head, el, tmp)                                           \
+    for (((el) = (head)),                                                      \
+         ((tmp) = DECLTYPE(el)((head != NULL) ? (head)->hh.next : NULL));      \
+         (el) != NULL;                                                         \
+         ((el) = (tmp)),                                                       \
+         ((tmp) = DECLTYPE(el)((tmp != NULL) ? (tmp)->hh.next : NULL)))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh, head)
+#define HASH_CNT(hh, head) ((head != NULL) ? ((head)->hh.tbl->num_items) : 0U)
+
+typedef struct UT_hash_bucket
+{
+    struct UT_hash_handle *hh_head;
+    unsigned count;
+
+    /* expand_mult is normally set to 0. In this situation, the max chain length
+     * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+     * the bucket's chain exceeds this length, bucket expansion is triggered).
+     * However, setting expand_mult to a non-zero value delays bucket expansion
+     * (that would be triggered by additions to this particular bucket)
+     * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+     * (The multiplier is simply expand_mult+1). The whole idea of this
+     * multiplier is to reduce bucket expansions, since they are expensive, in
+     * situations where we know that a particular bucket tends to be overused.
+     * It is better to let its chain length grow to a longer yet-still-bounded
+     * value, than to do an O(n) bucket expansion too often.
+     */
+    unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1u
+#define HASH_BLOOM_SIGNATURE 0xb12220f2u
+
+typedef struct UT_hash_table
+{
+    UT_hash_bucket *buckets;
+    unsigned num_buckets, log2_num_buckets;
+    unsigned num_items;
+    struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+    ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+    /* in an ideal situation (all buckets used equally), no bucket would have
+     * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+    unsigned ideal_chain_maxlen;
+
+    /* nonideal_items is the number of items in the hash whose chain position
+     * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+     * hash distribution; reaching them in a chain traversal takes >ideal steps
+     */
+    unsigned nonideal_items;
+
+    /* ineffective expands occur when a bucket doubling was performed, but
+     * afterward, more than half the items in the hash had nonideal chain
+     * positions. If this happens on two consecutive expansions we inhibit any
+     * further expansion, as it's not helping; this happens when the hash
+     * function isn't a good fit for the key domain. When expansion is inhibited
+     * the hash will still work, albeit no longer in constant time. */
+    unsigned ineff_expands, noexpand;
+
+    uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+    uint32_t
+        bloom_sig; /* used only to test bloom exists in external analysis */
+    uint8_t *bloom_bv;
+    uint8_t bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle
+{
+    struct UT_hash_table *tbl;
+    void *prev;                     /* prev element in app order      */
+    void *next;                     /* next element in app order      */
+    struct UT_hash_handle *hh_prev; /* previous hh in bucket order    */
+    struct UT_hash_handle *hh_next; /* next hh in bucket order        */
+    const void *key;                /* ptr to enclosing struct's key  */
+    unsigned keylen;                /* enclosing struct's key len     */
+    unsigned hashv;                 /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */
\ No newline at end of file
diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind-common.h b/systrace/thirdparty/x86_64/libunwind/libunwind-common.h
new file mode 100644
index 0000000000000000000000000000000000000000..9c0db22b11df3075b718e08ceb5c89f6d4df57b0
--- /dev/null
+++ b/systrace/thirdparty/x86_64/libunwind/libunwind-common.h
@@ -0,0 +1,335 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2001-2004 Hewlett-Packard Co
+    Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#define UNW_VERSION_MAJOR 1
+#define UNW_VERSION_MINOR 9
+#define UNW_VERSION_EXTRA -pre
+
+#define UNW_VERSION_CODE(maj, min) (((maj) << 16) | (min))
+#define UNW_VERSION UNW_VERSION_CODE(UNW_VERSION_MAJOR, UNW_VERSION_MINOR)
+
+#ifdef __sun
+// On SmartOS, gcc fails with the following error:
+//
+// ../include/libunwind-common.h:43:41: error: expected identifier or '(' before
+// numeric constant # define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U,UNW_TARGET),_)
+//                                         ^
+//
+// workaround is to undefine _U explicitly.
+// see https://github.com/libunwind/libunwind/issues/118 for more details.
+//
+#undef _U
+#endif
+
+#define UNW_PASTE2(x, y) x##y
+#define UNW_PASTE(x, y) UNW_PASTE2(x, y)
+#define UNW_OBJ(fn) UNW_PASTE(UNW_PREFIX, fn)
+#define UNW_ARCH_OBJ(fn) UNW_PASTE(UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _), fn)
+
+#ifdef UNW_LOCAL_ONLY
+#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_UL, UNW_TARGET), _)
+#else /* !UNW_LOCAL_ONLY */
+#define UNW_PREFIX UNW_PASTE(UNW_PASTE(_U, UNW_TARGET), _)
+#endif /* !UNW_LOCAL_ONLY */
+
+/* Error codes.  The unwind routines return the *negated* values of
+   these error codes on error and a non-negative value on success.  */
+typedef enum
+{
+    UNW_ESUCCESS = 0, /* no error */
+    UNW_EUNSPEC,      /* unspecified (general) error */
+    UNW_ENOMEM,       /* out of memory */
+    UNW_EBADREG,      /* bad register number */
+    UNW_EREADONLYREG, /* attempt to write read-only register */
+    UNW_ESTOPUNWIND,  /* stop unwinding */
+    UNW_EINVALIDIP,   /* invalid IP */
+    UNW_EBADFRAME,    /* bad frame */
+    UNW_EINVAL,       /* unsupported operation or bad value */
+    UNW_EBADVERSION,  /* unwind info has unsupported version */
+    UNW_ENOINFO       /* no unwind info found */
+} unw_error_t;
+
+/* The following enum defines the indices for a couple of
+   (pseudo-)registers which have the same meaning across all
+   platforms.  (RO) means read-only.  (RW) means read-write.  General
+   registers (aka "integer registers") are expected to start with
+   index 0.  The number of such registers is architecture-dependent.
+   The remaining indices can be used as an architecture sees fit.  The
+   last valid register index is given by UNW_REG_LAST.  */
+typedef enum
+{
+    UNW_REG_IP = UNW_TDEP_IP, /* (rw) instruction pointer (pc) */
+    UNW_REG_SP = UNW_TDEP_SP, /* (ro) stack pointer */
+    UNW_REG_EH = UNW_TDEP_EH, /* (rw) exception-handling reg base */
+    UNW_REG_LAST = UNW_TDEP_LAST_REG
+} unw_frame_regnum_t;
+
+/* Number of exception-handler argument registers: */
+#define UNW_NUM_EH_REGS UNW_TDEP_NUM_EH_REGS
+
+typedef enum
+{
+    UNW_CACHE_NONE,      /* no caching */
+    UNW_CACHE_GLOBAL,    /* shared global cache */
+    UNW_CACHE_PER_THREAD /* per-thread caching */
+} unw_caching_policy_t;
+
+typedef enum
+{
+    UNW_INIT_SIGNAL_FRAME = 1 /* We know this is a signal frame */
+} unw_init_local2_flags_t;
+
+typedef int unw_regnum_t;
+
+/* The unwind cursor starts at the youngest (most deeply nested) frame
+   and is used to track the frame state as the unwinder steps from
+   frame to frame.  It is safe to make (shallow) copies of variables
+   of this type.  */
+typedef struct unw_cursor
+{
+    unw_word_t opaque[UNW_TDEP_CURSOR_LEN];
+} unw_cursor_t;
+
+/* This type encapsulates the entire (preserved) machine-state.  */
+typedef unw_tdep_context_t unw_context_t;
+
+/* unw_getcontext() fills the unw_context_t pointed to by UC with the
+   machine state as it exists at the call-site.  For implementation
+   reasons, this needs to be a target-dependent macro.  It's easiest
+   to think of unw_getcontext() as being identical to getcontext(). */
+#define unw_getcontext(uc) unw_tdep_getcontext(uc)
+
+/* Return 1 if register number R is a floating-point register, zero
+   otherwise.
+   This routine is signal-safe.  */
+#define unw_is_fpreg(r) unw_tdep_is_fpreg(r)
+
+typedef unw_tdep_fpreg_t unw_fpreg_t;
+
+typedef struct unw_addr_space *unw_addr_space_t;
+
+/* Each target may define it's own set of flags, but bits 0-15 are
+   reserved for general libunwind-use.  */
+#define UNW_PI_FLAG_FIRST_TDEP_BIT 16
+/* The information comes from a .debug_frame section.  */
+#define UNW_PI_FLAG_DEBUG_FRAME 32
+
+typedef struct unw_proc_info
+{
+    unw_word_t start_ip; /* first IP covered by this procedure */
+    unw_word_t end_ip;   /* first IP NOT covered by this procedure */
+#if defined(NEED_LAST_IP)
+    unw_word_t last_ip; /* first IP that could begin another procedure */
+#endif
+    unw_word_t lsda;    /* address of lang.-spec. data area (if any) */
+    unw_word_t handler; /* optional personality routine */
+    unw_word_t gp;      /* global-pointer value for this procedure */
+    unw_word_t flags;   /* misc. flags */
+
+    int format;                 /* unwind-info format (arch-specific) */
+    int unwind_info_size;       /* size of the information (if applicable) */
+    void *unwind_info;          /* unwind-info (arch-specific) */
+    unw_tdep_proc_info_t extra; /* target-dependent auxiliary proc-info */
+} unw_proc_info_t;
+
+typedef int (*unw_reg_states_callback)(void *token, void *reg_states_data,
+                                       size_t reg_states_data_size,
+                                       unw_word_t start_ip, unw_word_t end_ip);
+
+/* These are backend callback routines that provide access to the
+   state of a "remote" process.  This can be used, for example, to
+   unwind another process through the ptrace() interface.  */
+typedef struct unw_accessors
+{
+    /* Look up the unwind info associated with instruction-pointer IP.
+       On success, the routine fills in the PROC_INFO structure.  */
+    int (*find_proc_info)(unw_addr_space_t, unw_word_t, unw_proc_info_t *, int,
+                          void *);
+
+    /* Release any resources (e.g., memory) that were allocated for
+       the unwind info returned in by a previous call to
+       find_proc_info() with NEED_UNWIND_INFO set to 1.  */
+    void (*put_unwind_info)(unw_addr_space_t, unw_proc_info_t *, void *);
+
+    /* Return the list-head of the dynamically registered unwind
+       info.  */
+    int (*get_dyn_info_list_addr)(unw_addr_space_t, unw_word_t *, void *);
+
+    /* Access aligned word at address ADDR.  The value is returned
+       according to the endianness of the host (e.g., if the host is
+       little-endian and the target is big-endian, access_mem() needs
+       to byte-swap the value before returning it).  */
+    int (*access_mem)(unw_addr_space_t, unw_word_t, unw_word_t *, int, void *);
+
+    /* Access register number REG at address ADDR.  */
+    int (*access_reg)(unw_addr_space_t, unw_regnum_t, unw_word_t *, int,
+                      void *);
+
+    /* Access register number REG at address ADDR.  */
+    int (*access_fpreg)(unw_addr_space_t, unw_regnum_t, unw_fpreg_t *, int,
+                        void *);
+
+    int (*resume)(unw_addr_space_t, unw_cursor_t *, void *);
+
+    /* Optional call back to obtain the name of a (static) procedure.
+       Dynamically generated procedures are handled automatically by
+       libunwind.  This callback is optional and may be set to
+       NULL.  */
+    int (*get_proc_name)(unw_addr_space_t, unw_word_t, char *, size_t,
+                         unw_word_t *, void *);
+
+    /* Optional call back to obtain the name of a elf file where the ip belongs
+       to. This callback is optional and may be set to NULL.  */
+    int (*get_elf_filename)(unw_addr_space_t, unw_word_t, char *, size_t,
+                            unw_word_t *, void *);
+
+    /* Optional call back to obtain the start and end ip of a procedure.
+     * procedure ip range is [start, end), the range is without end.
+     * This callback is optional and may be set to NULL.
+     */
+    int (*get_proc_ip_range)(unw_addr_space_t, unw_word_t, unw_word_t *,
+                             unw_word_t *, void *);
+
+    /* Optional call back to return a mask to be used with pointer
+     * authentication on arm64.
+     *
+     * The on bits in the returned mask indicate which bits in a return address
+     * are part of a pointer authentication code.  These are the bits in the
+     * return address to turn off so that the calling frame can be found
+     * for the unwinding to continue.
+     *
+     * The return value must be host-endian.  e.g. if the target is big-endian
+     * and the host is little endian, the implementation of this function
+     * must byte swap.
+     *
+     * This callback is optional and may be set to NULL.  In this case all
+     * the bits in the return address are used, as if no masking were done.
+     */
+    unw_word_t (*ptrauth_insn_mask)(unw_addr_space_t, void *);
+
+} unw_accessors_t;
+
+typedef enum unw_save_loc_type
+{
+    UNW_SLT_NONE,   /* register is not saved ("not an l-value") */
+    UNW_SLT_MEMORY, /* register has been saved in memory */
+    UNW_SLT_REG     /* register has been saved in (another) register */
+} unw_save_loc_type_t;
+
+typedef struct unw_save_loc
+{
+    unw_save_loc_type_t type;
+    union
+    {
+        unw_word_t addr;     /* valid if type==UNW_SLT_MEMORY */
+        unw_regnum_t regnum; /* valid if type==UNW_SLT_REG */
+    } u;
+    unw_tdep_save_loc_t extra; /* target-dependent additional information */
+} unw_save_loc_t;
+
+struct dl_phdr_info;
+typedef int (*unw_iterate_phdr_callback_t)(struct dl_phdr_info *, size_t,
+                                           void *);
+typedef int (*unw_iterate_phdr_func_t)(unw_iterate_phdr_callback_t, void *);
+
+/* These routines work both for local and remote unwinding.  */
+
+#define unw_local_addr_space UNW_OBJ(local_addr_space)
+#define unw_create_addr_space UNW_OBJ(create_addr_space)
+#define unw_destroy_addr_space UNW_OBJ(destroy_addr_space)
+#define unw_get_accessors UNW_ARCH_OBJ(get_accessors)
+#define unw_get_accessors_int UNW_ARCH_OBJ(get_accessors_int)
+#define unw_init_local UNW_OBJ(init_local)
+#define unw_init_local2 UNW_OBJ(init_local2)
+#define unw_init_remote UNW_OBJ(init_remote)
+#define unw_step UNW_OBJ(step)
+#define unw_resume UNW_OBJ(resume)
+#define unw_get_proc_info UNW_OBJ(get_proc_info)
+#define unw_get_proc_info_by_ip UNW_OBJ(get_proc_info_by_ip)
+#define unw_get_proc_info_in_range UNW_OBJ(get_proc_info_in_range)
+#define unw_reg_states_iterate UNW_OBJ(reg_states_iterate)
+#define unw_apply_reg_state UNW_OBJ(apply_reg_state)
+#define unw_get_reg UNW_OBJ(get_reg)
+#define unw_set_reg UNW_OBJ(set_reg)
+#define unw_get_fpreg UNW_OBJ(get_fpreg)
+#define unw_set_fpreg UNW_OBJ(set_fpreg)
+#define unw_get_save_loc UNW_OBJ(get_save_loc)
+#define unw_is_signal_frame UNW_OBJ(is_signal_frame)
+#define unw_is_plt_entry UNW_OBJ(is_plt_entry)
+#define unw_get_proc_name UNW_OBJ(get_proc_name)
+#define unw_get_proc_name_by_ip UNW_OBJ(get_proc_name_by_ip)
+#define unw_get_elf_filename UNW_OBJ(get_elf_filename)
+#define unw_get_elf_filename_by_ip UNW_OBJ(get_elf_filename_by_ip)
+#define unw_set_caching_policy UNW_OBJ(set_caching_policy)
+#define unw_set_cache_size UNW_OBJ(set_cache_size)
+#define unw_set_iterate_phdr_function UNW_OBJ(set_iterate_phdr_function)
+#define unw_regname UNW_ARCH_OBJ(regname)
+#define unw_flush_cache UNW_ARCH_OBJ(flush_cache)
+#define unw_strerror UNW_ARCH_OBJ(strerror)
+
+extern unw_addr_space_t unw_create_addr_space(unw_accessors_t *, int);
+extern void unw_destroy_addr_space(unw_addr_space_t);
+extern unw_accessors_t *unw_get_accessors(unw_addr_space_t);
+extern unw_accessors_t *unw_get_accessors_int(unw_addr_space_t);
+extern void unw_flush_cache(unw_addr_space_t, unw_word_t, unw_word_t);
+extern int unw_set_caching_policy(unw_addr_space_t, unw_caching_policy_t);
+extern int unw_set_cache_size(unw_addr_space_t, size_t, int);
+extern void unw_set_iterate_phdr_function(unw_addr_space_t,
+                                          unw_iterate_phdr_func_t);
+extern const char *unw_regname(unw_regnum_t);
+
+extern int unw_init_local(unw_cursor_t *, unw_context_t *);
+extern int unw_init_local2(unw_cursor_t *, unw_context_t *, int);
+extern int unw_init_remote(unw_cursor_t *, unw_addr_space_t, void *);
+extern int unw_step(unw_cursor_t *);
+extern int unw_resume(unw_cursor_t *);
+extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *);
+extern int unw_get_proc_info_by_ip(unw_addr_space_t, unw_word_t,
+                                   unw_proc_info_t *, void *);
+extern int unw_get_proc_info_in_range(unw_word_t, unw_word_t, unw_word_t,
+                                      unw_word_t, unw_word_t, unw_word_t,
+                                      unw_addr_space_t, unw_word_t,
+                                      unw_proc_info_t *, int, void *);
+extern int unw_reg_states_iterate(unw_cursor_t *, unw_reg_states_callback,
+                                  void *);
+extern int unw_apply_reg_state(unw_cursor_t *, void *);
+extern int unw_get_reg(unw_cursor_t *, int, unw_word_t *);
+extern int unw_set_reg(unw_cursor_t *, int, unw_word_t);
+extern int unw_get_fpreg(unw_cursor_t *, int, unw_fpreg_t *);
+extern int unw_set_fpreg(unw_cursor_t *, int, unw_fpreg_t);
+extern int unw_get_save_loc(unw_cursor_t *, int, unw_save_loc_t *);
+extern int unw_is_signal_frame(unw_cursor_t *);
+extern int unw_is_plt_entry(unw_cursor_t *);
+extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern int unw_get_proc_name_by_ip(unw_addr_space_t, unw_word_t, char *, size_t,
+                                   unw_word_t *, void *);
+extern int unw_get_elf_filename(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern int unw_get_elf_filename_by_ip(unw_addr_space_t, unw_word_t, char *,
+                                      size_t, unw_word_t *, void *);
+extern const char *unw_strerror(int);
+extern int unw_backtrace(void **, int);
+extern int unw_backtrace2(void **, int, unw_context_t *, int);
+
+extern unw_addr_space_t unw_local_addr_space;
diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind-dynamic.h b/systrace/thirdparty/x86_64/libunwind/libunwind-dynamic.h
new file mode 100644
index 0000000000000000000000000000000000000000..13caf1633631ccc33d6c90ace394c539dd03f124
--- /dev/null
+++ b/systrace/thirdparty/x86_64/libunwind/libunwind-dynamic.h
@@ -0,0 +1,201 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2002-2004 Hewlett-Packard Co
+        Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+/* This file defines the runtime-support routines for dynamically
+generated code.  Even though it is implemented as part of libunwind,
+it is logically separate from the interface to perform the actual
+unwinding.  In particular, this interface is always used in the
+context of the unwind target, whereas the rest of the unwind API is
+used in context of the process that is doing the unwind (which may be
+a debugger running on another machine, for example).
+
+Note that the data-structures declared here server a dual purpose:
+when a program registers a dynamically generated procedure, it uses
+these structures directly.  On the other hand, with remote-unwinding,
+the data-structures are read from the remote process's memory and
+translated into internalized versions.  To facilitate remote-access,
+the following rules should be followed in declaring these structures:
+
+ (1) Declare a member as a pointer only if the the information the
+     member points to needs to be internalized as well (e.g., a
+     string representing a procedure name should be declared as
+     "const char *", but the instruction pointer should be declared
+     as unw_word_t).
+
+ (2) Provide sufficient padding to ensure that no implicit padding
+     will be needed on any of the supported target architectures.  For
+     the time being, padding data structures with the assumption that
+     sizeof (unw_word_t) == 8 should be sufficient.  (Note: it's not
+     impossible to internalize structures with internal padding, but
+     it does make the process a bit harder).
+
+ (3) Don't declare members that contain bitfields or floating-point
+     values.
+
+ (4) Don't declare members with enumeration types.  Declare them as
+     int32_t instead.  */
+
+typedef enum
+{
+    UNW_DYN_STOP = 0,     /* end-of-unwind-info marker */
+    UNW_DYN_SAVE_REG,     /* save register to another register */
+    UNW_DYN_SPILL_FP_REL, /* frame-pointer-relative register spill */
+    UNW_DYN_SPILL_SP_REL, /* stack-pointer-relative register spill */
+    UNW_DYN_ADD,          /* add constant value to a register */
+    UNW_DYN_POP_FRAMES,   /* drop one or more stack frames */
+    UNW_DYN_LABEL_STATE,  /* name the current state */
+    UNW_DYN_COPY_STATE,   /* set the region's entry-state */
+    UNW_DYN_ALIAS         /* get unwind info from an alias */
+} unw_dyn_operation_t;
+
+typedef enum
+{
+    UNW_INFO_FORMAT_DYNAMIC,      /* unw_dyn_proc_info_t */
+    UNW_INFO_FORMAT_TABLE,        /* unw_dyn_table_t */
+    UNW_INFO_FORMAT_REMOTE_TABLE, /* unw_dyn_remote_table_t */
+    UNW_INFO_FORMAT_ARM_EXIDX,    /* ARM specific unwind info */
+    UNW_INFO_FORMAT_IP_OFFSET     /* Like UNW_INFO_FORMAT_REMOTE_TABLE, but
+                                     table entries are considered
+                                     relative to di->start_ip, rather
+                                     than di->segbase */
+} unw_dyn_info_format_t;
+
+typedef struct unw_dyn_op
+{
+    int8_t tag;     /* what operation? */
+    int8_t qp;      /* qualifying predicate register */
+    int16_t reg;    /* what register */
+    int32_t when;   /* when does it take effect? */
+    unw_word_t val; /* auxiliary value */
+} unw_dyn_op_t;
+
+typedef struct unw_dyn_region_info
+{
+    struct unw_dyn_region_info *next; /* linked list of regions */
+    int32_t insn_count;               /* region length (# of instructions) */
+    uint32_t op_count;                /* length of op-array */
+    unw_dyn_op_t op[1];               /* variable-length op-array */
+} unw_dyn_region_info_t;
+
+typedef struct unw_dyn_proc_info
+{
+    unw_word_t name_ptr; /* address of human-readable procedure name */
+    unw_word_t handler;  /* address of personality routine */
+    uint32_t flags;
+    int32_t pad0;
+    unw_dyn_region_info_t *regions;
+} unw_dyn_proc_info_t;
+
+typedef struct unw_dyn_table_info
+{
+    unw_word_t name_ptr;  /* addr. of table name (e.g., library name) */
+    unw_word_t segbase;   /* segment base */
+    unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */
+    unw_word_t *table_data;
+} unw_dyn_table_info_t;
+
+typedef struct unw_dyn_remote_table_info
+{
+    unw_word_t name_ptr;  /* addr. of table name (e.g., library name) */
+    unw_word_t segbase;   /* segment base */
+    unw_word_t table_len; /* must be a multiple of sizeof(unw_word_t)! */
+    unw_word_t table_data;
+} unw_dyn_remote_table_info_t;
+
+typedef struct unw_dyn_info
+{
+    /* doubly-linked list of dyn-info structures: */
+    struct unw_dyn_info *next;
+    struct unw_dyn_info *prev;
+    unw_word_t start_ip; /* first IP covered by this entry */
+    unw_word_t end_ip;   /* first IP NOT covered by this entry */
+    unw_word_t gp;       /* global-pointer in effect for this entry */
+    int32_t format;      /* real type: unw_dyn_info_format_t */
+    int32_t pad;
+    unw_word_t load_offset; /* ELF load offset */
+    union
+    {
+        unw_dyn_proc_info_t pi;
+        unw_dyn_table_info_t ti;
+        unw_dyn_remote_table_info_t rti;
+    } u;
+} unw_dyn_info_t;
+
+typedef struct unw_dyn_info_list
+{
+    uint32_t version;
+    uint32_t generation;
+    unw_dyn_info_t *first;
+} unw_dyn_info_list_t;
+
+/* Return the size (in bytes) of an unw_dyn_region_info_t structure that can
+   hold OP_COUNT ops.  */
+#define _U_dyn_region_info_size(op_count)                                      \
+    ((char *)(((unw_dyn_region_info_t *)NULL)->op + (op_count)) - (char *)NULL)
+
+/* Register the unwind info for a single procedure.
+   This routine is NOT signal-safe.  */
+extern void _U_dyn_register(unw_dyn_info_t *);
+
+/* Cancel the unwind info for a single procedure.
+   This routine is NOT signal-safe.  */
+extern void _U_dyn_cancel(unw_dyn_info_t *);
+
+/* Convenience routines.  */
+
+#define _U_dyn_op(_tag, _qp, _when, _reg, _val)                                \
+    ((unw_dyn_op_t){(_tag), (_qp), (_reg), (_when), (_val)})
+
+#define _U_dyn_op_save_reg(op, qp, when, reg, dst)                             \
+    (*(op) = _U_dyn_op(UNW_DYN_SAVE_REG, (qp), (when), (reg), (dst)))
+
+#define _U_dyn_op_spill_fp_rel(op, qp, when, reg, offset)                      \
+    (*(op) = _U_dyn_op(UNW_DYN_SPILL_FP_REL, (qp), (when), (reg), (offset)))
+
+#define _U_dyn_op_spill_sp_rel(op, qp, when, reg, offset)                      \
+    (*(op) = _U_dyn_op(UNW_DYN_SPILL_SP_REL, (qp), (when), (reg), (offset)))
+
+#define _U_dyn_op_add(op, qp, when, reg, value)                                \
+    (*(op) = _U_dyn_op(UNW_DYN_ADD, (qp), (when), (reg), (value)))
+
+#define _U_dyn_op_pop_frames(op, qp, when, num_frames)                         \
+    (*(op) = _U_dyn_op(UNW_DYN_POP_FRAMES, (qp), (when), 0, (num_frames)))
+
+#define _U_dyn_op_label_state(op, label)                                       \
+    (*(op) = _U_dyn_op(UNW_DYN_LABEL_STATE, _U_QP_TRUE, -1, 0, (label)))
+
+#define _U_dyn_op_copy_state(op, label)                                        \
+    (*(op) = _U_dyn_op(UNW_DYN_COPY_STATE, _U_QP_TRUE, -1, 0, (label)))
+
+#define _U_dyn_op_alias(op, qp, when, addr)                                    \
+    (*(op) = _U_dyn_op(UNW_DYN_ALIAS, (qp), (when), 0, (addr)))
+
+#define _U_dyn_op_stop(op)                                                     \
+    (*(op) = _U_dyn_op(UNW_DYN_STOP, _U_QP_TRUE, -1, 0, 0))
+
+/* The target-dependent qualifying predicate which is always TRUE.  On
+   IA-64, that's p0 (0), on non-predicated architectures, the value is
+   ignored.  */
+#define _U_QP_TRUE _U_TDEP_QP_TRUE
diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind-x86_64.h b/systrace/thirdparty/x86_64/libunwind/libunwind-x86_64.h
new file mode 100644
index 0000000000000000000000000000000000000000..e9fc8177ed8618f5fdc85b588a6247c273186f28
--- /dev/null
+++ b/systrace/thirdparty/x86_64/libunwind/libunwind-x86_64.h
@@ -0,0 +1,146 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2002-2004 Hewlett-Packard Co
+        Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+   Modified for x86_64 by Max Asbock <masbock@us.ibm.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef LIBUNWIND_H
+#define LIBUNWIND_H
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C"
+{
+#endif
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <ucontext.h>
+
+#ifndef UNW_EMPTY_STRUCT
+#define UNW_EMPTY_STRUCT uint8_t unused;
+#endif
+
+#define UNW_TARGET x86_64
+#define UNW_TARGET_X86_64 1
+
+#define _U_TDEP_QP_TRUE 0 /* see libunwind-dynamic.h  */
+
+/* This needs to be big enough to accommodate "struct cursor", while
+   leaving some slack for future expansion.  Changing this value will
+   require recompiling all users of this library.  Stack allocation is
+   relatively cheap and unwind-state copying is relatively rare, so we
+   want to err on making it rather too big than too small.  */
+#define UNW_TDEP_CURSOR_LEN 127
+
+    typedef uint64_t unw_word_t;
+    typedef int64_t unw_sword_t;
+
+    typedef long double unw_tdep_fpreg_t;
+
+#define UNW_WORD_MAX UINT64_MAX
+
+    typedef enum
+    {
+        UNW_X86_64_RAX,
+        UNW_X86_64_RDX,
+        UNW_X86_64_RCX,
+        UNW_X86_64_RBX,
+        UNW_X86_64_RSI,
+        UNW_X86_64_RDI,
+        UNW_X86_64_RBP,
+        UNW_X86_64_RSP,
+        UNW_X86_64_R8,
+        UNW_X86_64_R9,
+        UNW_X86_64_R10,
+        UNW_X86_64_R11,
+        UNW_X86_64_R12,
+        UNW_X86_64_R13,
+        UNW_X86_64_R14,
+        UNW_X86_64_R15,
+        UNW_X86_64_RIP,
+#ifdef CONFIG_MSABI_SUPPORT
+        UNW_X86_64_XMM0,
+        UNW_X86_64_XMM1,
+        UNW_X86_64_XMM2,
+        UNW_X86_64_XMM3,
+        UNW_X86_64_XMM4,
+        UNW_X86_64_XMM5,
+        UNW_X86_64_XMM6,
+        UNW_X86_64_XMM7,
+        UNW_X86_64_XMM8,
+        UNW_X86_64_XMM9,
+        UNW_X86_64_XMM10,
+        UNW_X86_64_XMM11,
+        UNW_X86_64_XMM12,
+        UNW_X86_64_XMM13,
+        UNW_X86_64_XMM14,
+        UNW_X86_64_XMM15,
+        UNW_TDEP_LAST_REG = UNW_X86_64_XMM15,
+#else
+    UNW_TDEP_LAST_REG = UNW_X86_64_RIP,
+#endif
+
+        /* XXX Add other regs here */
+
+        /* frame info (read-only) */
+        UNW_X86_64_CFA,
+
+        UNW_TDEP_IP = UNW_X86_64_RIP,
+        UNW_TDEP_SP = UNW_X86_64_RSP,
+        UNW_TDEP_BP = UNW_X86_64_RBP,
+        UNW_TDEP_EH = UNW_X86_64_RAX
+    } x86_64_regnum_t;
+
+#define UNW_TDEP_NUM_EH_REGS 2 /* XXX Not sure what this means */
+
+    typedef struct unw_tdep_save_loc
+    {
+        /* Additional target-dependent info on a save location.  */
+        UNW_EMPTY_STRUCT
+    } unw_tdep_save_loc_t;
+
+    /* On x86_64, we can directly use ucontext_t as the unwind context.  */
+    typedef ucontext_t unw_tdep_context_t;
+
+    typedef struct
+    {
+        /* no x86-64-specific auxiliary proc-info */
+        UNW_EMPTY_STRUCT
+    } unw_tdep_proc_info_t;
+
+#include "libunwind-common.h"
+#include "libunwind-dynamic.h"
+
+#define unw_tdep_getcontext UNW_ARCH_OBJ(getcontext)
+#define unw_tdep_is_fpreg UNW_ARCH_OBJ(is_fpreg)
+
+    extern int unw_tdep_getcontext(unw_tdep_context_t *);
+    extern int unw_tdep_is_fpreg(int);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* LIBUNWIND_H */
diff --git a/systrace/thirdparty/x86_64/libunwind/libunwind.h b/systrace/thirdparty/x86_64/libunwind/libunwind.h
new file mode 100644
index 0000000000000000000000000000000000000000..db092c7bcddb366ad8c21359af108547322c9c78
--- /dev/null
+++ b/systrace/thirdparty/x86_64/libunwind/libunwind.h
@@ -0,0 +1,40 @@
+/* Provide a real file - not a symlink - as it would cause multiarch conflicts
+   when multiple different arch releases are installed simultaneously.  */
+
+#ifndef UNW_REMOTE_ONLY
+
+#if defined __aarch64__
+#include "libunwind-aarch64.h"
+#elif defined __arm__
+#include "libunwind-arm.h"
+#elif defined __hppa__
+#include "libunwind-hppa.h"
+#elif defined __ia64__
+#include "libunwind-ia64.h"
+#elif defined __mips__
+#include "libunwind-mips.h"
+#elif defined __powerpc__ && !defined __powerpc64__
+#include "libunwind-ppc32.h"
+#elif defined __powerpc64__
+#include "libunwind-ppc64.h"
+#elif defined __sh__
+#include "libunwind-sh.h"
+#elif defined __i386__
+#include "libunwind-x86.h"
+#elif defined __x86_64__
+#include "libunwind-x86_64.h"
+#elif defined __s390x__
+#include "libunwind-s390x.h"
+#elif defined __riscv || defined __riscv__
+#include "libunwind-riscv.h"
+#elif defined __loongarch64
+#include "libunwind-loongarch64.h"
+#else
+#error "Unsupported arch"
+#endif
+
+#else /* UNW_REMOTE_ONLY */
+
+#include "libunwind-x86_64.h"
+
+#endif /* UNW_REMOTE_ONLY */
diff --git a/systrace/thirdparty/x86_64/libunwind/unwind.h b/systrace/thirdparty/x86_64/libunwind/unwind.h
new file mode 100644
index 0000000000000000000000000000000000000000..69201dc8929eb8fcb5c63d059ca538c5fc4273a4
--- /dev/null
+++ b/systrace/thirdparty/x86_64/libunwind/unwind.h
@@ -0,0 +1,158 @@
+/* libunwind - a platform-independent unwind library
+   Copyright (C) 2003 Hewlett-Packard Co
+        Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+
+This file is part of libunwind.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _UNWIND_H
+#define _UNWIND_H
+
+/* For uint64_t */
+#include <stdalign.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    /* Minimal interface as per C++ ABI draft standard:
+
+            http://www.codesourcery.com/cxx-abi/abi-eh.html */
+
+    typedef enum
+    {
+        _URC_NO_REASON = 0,
+        _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+        _URC_FATAL_PHASE2_ERROR = 2,
+        _URC_FATAL_PHASE1_ERROR = 3,
+        _URC_NORMAL_STOP = 4,
+        _URC_END_OF_STACK = 5,
+        _URC_HANDLER_FOUND = 6,
+        _URC_INSTALL_CONTEXT = 7,
+        _URC_CONTINUE_UNWIND = 8
+    } _Unwind_Reason_Code;
+
+    typedef int _Unwind_Action;
+
+#define _UA_SEARCH_PHASE 1
+#define _UA_CLEANUP_PHASE 2
+#define _UA_HANDLER_FRAME 4
+#define _UA_FORCE_UNWIND 8
+
+    struct _Unwind_Context;   /* opaque data-structure */
+    struct _Unwind_Exception; /* forward-declaration */
+
+    typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
+                                                 struct _Unwind_Exception *);
+
+    typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
+                                                   uint64_t,
+                                                   struct _Unwind_Exception *,
+                                                   struct _Unwind_Context *,
+                                                   void *);
+
+    /* The C++ ABI requires exception_class, private_1, and private_2 to
+       be of type uint64 and the entire structure to be
+       double-word-aligned. Please note that exception_class stays 64-bit
+       even on 32-bit machines for gcc compatibility.  */
+    struct _Unwind_Exception
+    {
+        alignas(8) uint64_t exception_class;
+        _Unwind_Exception_Cleanup_Fn exception_cleanup;
+        unsigned long private_1;
+        unsigned long private_2;
+    };
+
+    extern _Unwind_Reason_Code
+    _Unwind_RaiseException(struct _Unwind_Exception *);
+    extern _Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
+                                                    _Unwind_Stop_Fn, void *);
+    extern void _Unwind_Resume(struct _Unwind_Exception *);
+    extern void _Unwind_DeleteException(struct _Unwind_Exception *);
+    extern unsigned long _Unwind_GetGR(struct _Unwind_Context *, int);
+    extern void _Unwind_SetGR(struct _Unwind_Context *, int, unsigned long);
+    extern unsigned long _Unwind_GetIP(struct _Unwind_Context *);
+    extern unsigned long _Unwind_GetIPInfo(struct _Unwind_Context *, int *);
+    extern void _Unwind_SetIP(struct _Unwind_Context *, unsigned long);
+    extern unsigned long
+    _Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
+    extern unsigned long _Unwind_GetRegionStart(struct _Unwind_Context *);
+
+#ifdef _GNU_SOURCE
+
+    /* Callback for _Unwind_Backtrace().  The backtrace stops immediately
+       if the callback returns any value other than _URC_NO_REASON. */
+    typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
+                                                    void *);
+
+/* See http://gcc.gnu.org/ml/gcc-patches/2001-09/msg00082.html for why
+   _UA_END_OF_STACK exists.  */
+#define _UA_END_OF_STACK 16
+
+    /* If the unwind was initiated due to a forced unwind, resume that
+       operation, else re-raise the exception.  This is used by
+       __cxa_rethrow().  */
+    extern _Unwind_Reason_Code
+    _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
+
+    /* See http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00154.html for why
+       _Unwind_GetBSP() exists.  */
+    extern unsigned long _Unwind_GetBSP(struct _Unwind_Context *);
+
+    /* Return the "canonical frame address" for the given context.
+       This is used by NPTL... */
+    extern unsigned long _Unwind_GetCFA(struct _Unwind_Context *);
+
+    /* Return the base-address for data references.  */
+    extern unsigned long _Unwind_GetDataRelBase(struct _Unwind_Context *);
+
+    /* Return the base-address for text references.  */
+    extern unsigned long _Unwind_GetTextRelBase(struct _Unwind_Context *);
+
+    /* Call _Unwind_Trace_Fn once for each stack-frame, without doing any
+       cleanup.  The first frame for which the callback is invoked is the
+       one for the caller of _Unwind_Backtrace().  _Unwind_Backtrace()
+       returns _URC_END_OF_STACK when the backtrace stopped due to
+       reaching the end of the call-chain or _URC_FATAL_PHASE1_ERROR if it
+       stops for any other reason.  */
+    extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+    /* Find the start-address of the procedure containing the specified IP
+       or NULL if it cannot be found (e.g., because the function has no
+       unwind info).  Note: there is not necessarily a one-to-one
+       correspondence between source-level functions and procedures: some
+       functions don't have unwind-info and others are split into multiple
+       procedures.  */
+    extern void *_Unwind_FindEnclosingFunction(void *);
+
+    /* See also Linux Standard Base Spec:
+        http://www.linuxbase.org/spec/refspecs/LSB_1.3.0/gLSB/gLSB/libgcc-s.html
+     */
+
+#endif /* _GNU_SOURCE */
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* _UNWIND_H */
diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti.h b/systrace/thirdparty/x86_64/mspti/include/mspti.h
new file mode 100644
index 0000000000000000000000000000000000000000..e83c454c11cb784c7a22f82f50127f2f9d2a368c
--- /dev/null
+++ b/systrace/thirdparty/x86_64/mspti/include/mspti.h
@@ -0,0 +1,19 @@
+/**
+ * @file mspti.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_H
+#define MSPTI_H
+
+#include "mspti_activity.h"
+#include "mspti_callback.h"
+#include "mspti_cbid.h"
+#include "mspti_result.h"
+
+#endif
diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_activity.h b/systrace/thirdparty/x86_64/mspti/include/mspti_activity.h
new file mode 100644
index 0000000000000000000000000000000000000000..30f71598d073b9637c9ec440939f30f65ef30e74
--- /dev/null
+++ b/systrace/thirdparty/x86_64/mspti/include/mspti_activity.h
@@ -0,0 +1,424 @@
+/**
+ * @file mspti_activity.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_ACTIVITY_H
+#define MSPTI_ACTIVITY_H
+
+#define ACTIVITY_STRUCT_ALIGNMENT 8
+#if defined(_WIN32)
+#define START_PACKED_ALIGNMENT __pragma(pack(push, 1))
+#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_STRUCT_ALIGNMENT))
+#define END_PACKED_ALIGNMENT __pragma(pack(pop))
+#elif defined(__GNUC__)
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT                                                       \
+    __attribute__((__packed__))                                                \
+    __attribute__((aligned(ACTIVITY_STRUCT_ALIGNMENT)))
+#define END_PACKED_ALIGNMENT
+#else
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT
+#define END_PACKED_ALIGNMENT
+#endif
+
+#include "mspti_result.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility push(default)
+#endif
+
+    /**
+     * @brief The kinds of activity records.
+     *
+     * Each kind is associated with a
+     * activity record structure that holds the information associated
+     * with the kind.
+     */
+    typedef enum
+    {
+        /**
+         * The activity record is invalid.
+         */
+        MSPTI_ACTIVITY_KIND_INVALID = 0,
+        MSPTI_ACTIVITY_KIND_MARKER = 1,
+        MSPTI_ACTIVITY_KIND_KERNEL = 2,
+        MSPTI_ACTIVITY_KIND_API = 3,
+        MSPTI_ACTIVITY_KIND_COUNT,
+        MSPTI_ACTIVITY_KIND_FORCE_INT = 0x7fffffff
+    } msptiActivityKind;
+
+    /**
+     * @brief The source kinds of mark data.
+     *
+     * Each mark activity record kind represents information about host or
+     * device
+     */
+    typedef enum
+    {
+        MSPTI_ACTIVITY_SOURCE_KIND_HOST = 0,
+        MSPTI_ACTIVITY_SOURCE_KIND_DEVICE = 1
+    } msptiActivitySourceKind;
+
+    /**
+     * @brief Flags linked to activity records.
+     *
+     * These are the Flags that pertain to activity records.
+     * Flags can be combined by bitwise OR to
+     * associated multiple flags with an activity record.
+     */
+    typedef enum
+    {
+        /**
+         * Signifies that the activity record lacks any flags.
+         */
+        MSPTI_ACTIVITY_FLAG_NONE = 0,
+        /**
+         * Represents the activity as a pure host instantaneous marker. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS = 1 << 0,
+        /**
+         * Represents the activity as a pure host region start marker. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_START = 1 << 1,
+        /**
+         * Represents the activity as a pure host region end marker. Works with
+         * MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_END = 1 << 2,
+        /**
+         * Represents the activity as an instantaneous marker with device. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS_WITH_DEVICE = 1 << 3,
+        /**
+         * Represents the activity as a pure start marker with device. Works
+         * with MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_START_WITH_DEVICE = 1 << 4,
+        /**
+         * Represents the activity as a pure end marker with device. Works with
+         * MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        MSPTI_ACTIVITY_FLAG_MARKER_END_WITH_DEVICE = 1 << 5
+    } msptiActivityFlag;
+
+    START_PACKED_ALIGNMENT
+
+    typedef struct PACKED_ALIGNMENT
+    {
+        msptiActivityKind kind;
+    } msptiActivity;
+
+    typedef union PACKED_ALIGNMENT
+    {
+        /**
+         * A thread object requires that we identify both the process and
+         * thread ID.
+         */
+        struct
+        {
+            uint32_t processId;
+            uint32_t threadId;
+        } pt;
+        /**
+         * A stream object requires that we identify device and stream ID.
+         */
+        struct
+        {
+            uint32_t deviceId;
+            uint32_t streamId;
+        } ds;
+    } msptiObjectId;
+
+    /**
+     * @brief This activity record serves as a marker, representing a specific
+     * moment in time.
+     *
+     * The marker is characterized by a distinctive name and a unique identifier
+     */
+    typedef struct PACKED_ALIGNMENT
+    {
+        /**
+         * The activity record kind, always be MSPTI_ACTIVITY_KIND_MARKER.
+         */
+        msptiActivityKind kind;
+
+        /**
+         * The flags associated with the marker.
+         * @see msptiActivityFlag
+         */
+        msptiActivityFlag flag;
+
+        /**
+         * The source kinds of mark data.
+         * @see msptiActivitySourceKind
+         */
+        msptiActivitySourceKind sourceKind;
+
+        /**
+         * The timestamp for the marker, in ns. A value of 0 indicates that
+         * timestamp information could not be collected for the marker.
+         */
+        uint64_t timestamp;
+
+        /**
+         * The marker ID.
+         */
+        uint64_t id;
+
+        /**
+         * The identifier for the activity object associated with this
+         * marker. 'objectKind' indicates which ID is valid for this record.
+         */
+        msptiObjectId objectId;
+
+        /**
+         * The marker name for an instantaneous or start marker.
+         * This will be NULL for an end marker.
+         */
+        const char *name;
+
+        /**
+         * The name of the domain to which this marker belongs to.
+         * This will be NULL for default domain.
+         */
+        const char *domain;
+    } msptiActivityMarker;
+
+    typedef struct PACKED_ALIGNMENT
+    {
+        /**
+         * The activity record kind, must be MSPTI_ACTIVITY_KIND_API.
+         */
+        msptiActivityKind kind;
+
+        /**
+         * The start timestamp for the api, in ns.
+         */
+        uint64_t start;
+
+        /**
+         * The end timestamp for the api, in ns.
+         */
+        uint64_t end;
+
+        /**
+         * A thread object requires that we identify both the process and
+         * thread ID.
+         */
+        struct
+        {
+            uint32_t processId;
+            uint32_t threadId;
+        } pt;
+
+        /**
+         * The correlation ID of the kernel.
+         */
+        uint64_t correlationId;
+
+        /**
+         * The api name.
+         */
+        const char *name;
+    } msptiActivityApi;
+
+    typedef struct PACKED_ALIGNMENT
+    {
+        /**
+         * The activity record kind, must be MSPTI_ACTIVITY_KIND_KERNEL.
+         */
+        msptiActivityKind kind;
+
+        /**
+         * The start timestamp for the kernel, in ns.
+         */
+        uint64_t start;
+
+        /**
+         * The end timestamp for the kernel, in ns.
+         */
+        uint64_t end;
+
+        /**
+         * A stream object requires that we identify device and stream ID.
+         */
+        struct
+        {
+            uint32_t deviceId;
+            uint32_t streamId;
+        } ds;
+
+        /**
+         * The correlation ID of the kernel.
+         */
+        uint64_t correlationId;
+
+        /**
+         * The kernel type.
+         */
+        const char *type;
+
+        /**
+         * The kernel name.
+         */
+        const char *name;
+    } msptiActivityKernel;
+
+    END_PACKED_ALIGNMENT
+
+    /**
+     * @brief Function type for callback used by MSPTI to request an empty
+     * buffer for storing activity records.
+     *
+     * This callback function signals the MSPTI client that an activity
+     * buffer is needed by MSPTI. The activity buffer is used by MSPTI to
+     * store activity records. The callback function can decline the
+     * request by setting **buffer to NULL. In this case MSPTI may drop
+     * activity records.
+     *
+     * @param buffer Returns the new buffer. If set to NULL then no buffer
+     * is returned.
+     * @param size Returns the size of the returned buffer.
+     * @param maxNumRecords Returns the maximum number of records that
+     * should be placed in the buffer. If 0 then the buffer is filled with
+     * as many records as possible. If > 0 the buffer is filled with at
+     * most that many records before it is returned.
+     */
+    typedef void (*msptiBuffersCallbackRequestFunc)(uint8_t **buffer,
+                                                    size_t *size,
+                                                    size_t *maxNumRecords);
+
+    /**
+     * @brief Function type for callback used by MSPTI to return a buffer
+     * of activity records.
+     *
+     * This callback function returns to the MSPTI client a buffer
+     * containing activity records.  The buffer contains @p validSize
+     * bytes of activity records which should be read using
+     * msptiActivityGetNextRecord. After this call MSPTI
+     * relinquished ownership of the buffer and will not use it
+     * anymore. The client may return the buffer to MSPTI using the
+     * msptiBuffersCallbackRequestFunc callback.
+     *
+     * @param buffer The activity record buffer.
+     * @param size The total size of the buffer in bytes as set in
+     * MSPTI_BuffersCallbackRequestFunc.
+     * @param validSize The number of valid bytes in the buffer.
+     */
+    typedef void (*msptiBuffersCallbackCompleteFunc)(uint8_t *buffer,
+                                                     size_t size,
+                                                     size_t validSize);
+
+    /**
+     * @brief Registers callback functions with MSPTI for activity buffer
+     * handling.
+     *
+     * This function registers two callback functions to be used in asynchronous
+     * buffer handling. If registered, activity record buffers are handled using
+     * asynchronous requested/completed callbacks from MSPTI.
+     *
+     * @param funcBufferRequested callback which is invoked when an empty
+     * buffer is requested by MSPTI
+     * @param funcBufferCompleted callback which is invoked when a buffer
+     * containing activity records is available from MSPTI
+     *
+     * @retval MSPTI_SUCCESS
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if either
+     * funcBufferRequested or funcBufferCompleted is NULL
+     */
+    msptiResult msptiActivityRegisterCallbacks(
+        msptiBuffersCallbackRequestFunc funcBufferRequested,
+        msptiBuffersCallbackCompleteFunc funcBufferCompleted);
+
+    /**
+     * @brief Enable collection of a specific kind of activity record.
+     *
+     * Enable collection of a specific kind of activity record. Multiple
+     * kinds can be enabled by calling this function multiple times.
+     * By default, the collection of all activity types is inactive.
+     *
+     * @param kind The kind of activity record to collect
+     *
+     * @retval MSPTI_SUCCESS
+     */
+    msptiResult msptiActivityEnable(msptiActivityKind kind);
+
+    /**
+     * @brief Disable collection of a specific kind of activity record.
+     *
+     * Disable collection of a specific kind of activity record. Multiple
+     * kinds can be disabled by calling this function multiple times.
+     * By default, the collection of all activity types is inactive.
+     *
+     * @param kind The kind of activity record to stop collecting
+     *
+     * @retval MSPTI_SUCCESS
+     */
+    msptiResult msptiActivityDisable(msptiActivityKind kind);
+
+    /**
+     * @brief Iterate over the activity records in a buffer.
+     *
+     * This is a function to iterate over the activity records in buffer.
+     *
+     * @param buffer The buffer containing activity records
+     * @param validBufferSizeBytes The number of valid bytes in the buffer.
+     * @param record Inputs the previous record returned by
+     * msptiActivityGetNextRecord and returns the next activity record
+     * from the buffer. If input value is NULL, returns the first activity
+     * record in the buffer.
+     *
+     * @retval MSPTI_SUCCESS
+     * @retval MSPTI_ERROR_MAX_LIMIT_REACHED if no more records in the buffer
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if buffer is NULL.
+     */
+    msptiResult msptiActivityGetNextRecord(uint8_t *buffer,
+                                           size_t validBufferSizeBytes,
+                                           msptiActivity **record);
+
+    /**
+     * @brief Request to deliver activity records via the buffer completion
+     * callback.
+     *
+     * This function returns the activity records associated with all
+     * contexts/streams (and the global buffers not associated with any stream)
+     * to the MSPTI client using the callback registered in
+     * msptiActivityRegisterCallbacks. It return all activity buffers that
+     * contain completed activity records, even if these buffers are not
+     * completely filled.
+     *
+     * Before calling this function, the buffer handling callback api must be
+     * activated by calling msptiActivityRegisterCallbacks.
+     *
+     * @param flag Reserved for internal use.
+     *
+     * @retval MSPTI_SUCCESS
+     */
+    msptiResult msptiActivityFlushAll(uint32_t flag);
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility pop
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_callback.h b/systrace/thirdparty/x86_64/mspti/include/mspti_callback.h
new file mode 100644
index 0000000000000000000000000000000000000000..2e6f7ee2264b9e99f5f891fdc6ac3cd20d53bf66
--- /dev/null
+++ b/systrace/thirdparty/x86_64/mspti/include/mspti_callback.h
@@ -0,0 +1,258 @@
+/**
+ * @file mspti_callback.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_CALLBACK_H
+#define MSPTI_CALLBACK_H
+
+#include "mspti_cbid.h"
+#include "mspti_result.h"
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility push(default)
+#endif
+
+    /**
+     * @brief Callback domains.
+     *
+     * Callback domains. Each domain represents callback points for a
+     * group of related API functions or CANN driver activity.
+     */
+    typedef enum
+    {
+        /**
+         * Invalid domain.
+         */
+        MSPTI_CB_DOMAIN_INVALID = 0,
+        /**
+         * Domain containing callback points for all runtime API functions.
+         */
+        MSPTI_CB_DOMAIN_RUNTIME = 1,
+        MSPTI_CB_DOMAIN_HCCL = 2,
+        MSPTI_CB_DOMAIN_SIZE,
+        MSPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff
+    } msptiCallbackDomain;
+
+    typedef uint32_t msptiCallbackId;
+
+    /**
+     * @brief Specifies the point in an API call that a callback is issued.
+     *
+     * Specifies the point in an API call that a callback is issued. This
+     * value is communicated to the callback function by @ref
+     * msptiCallbackData::callbackSite.
+     */
+    typedef enum
+    {
+        /**
+         * The callback is at the entry of the API call.
+         */
+        MSPTI_API_ENTER = 0,
+        /**
+         * The callback is at the exit of the API call.
+         */
+        MSPTI_API_EXIT = 1,
+        MSPTI_API_CBSITE_FORCE_INT = 0x7fffffff
+    } msptiApiCallbackSite;
+
+    typedef struct
+    {
+        /**
+         * Point in the runtime or driver function from where the callback
+         * was issued.
+         */
+        msptiApiCallbackSite callbackSite;
+
+        /**
+         * Name of the runtime or driver API function which issued the
+         * callback.
+         */
+        const char *functionName;
+
+        /**
+         * Params of the runtime or driver API function which issued the
+         * callback.
+         */
+        const void *functionParams;
+
+        /**
+         * Pointer to the return value of the runtime or driver API
+         * call.
+         */
+        const void *functionReturnValue;
+
+        /**
+         * Name of the symbol operated on by the runtime or driver API
+         * function which issued the callback. This entry is valid only for
+         * driver and runtime launch callbacks, where it returns the name of
+         * the kernel.
+         */
+        const char *symbolName;
+
+        /**
+         * The activity record correlation ID for this callback. For a
+         * driver domain callback (i.e. @p domain
+         * MSPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID
+         * in the MSPTI_ActivityAPI record corresponding to the CANN driver
+         * function call. For a runtime domain callback (i.e. @p domain
+         * MSPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation
+         * ID in the MSPTI_ActivityAPI record corresponding to the CANN
+         * runtime function call. Within the callback, this ID can be
+         * recorded to correlate user data with the activity record.
+         */
+        uint64_t correlationId;
+
+        /**
+         * Undefined. Reserved for internal use.
+         */
+        uint64_t reserved1;
+
+        /**
+         * Undefined. Reserved for internal use.
+         */
+        uint64_t reserved2;
+
+        /**
+         * Pointer to data shared between the entry and exit callbacks of
+         * a given runtime or drive API function invocation. This field
+         * can be used to pass 64-bit values from the entry callback to
+         * the corresponding exit callback.
+         */
+        uint64_t *correlationData;
+    } msptiCallbackData;
+
+    /**
+     * @brief Function type for a callback.
+     *
+     * Function type for a callback. The type of the data passed to the
+     * callback in @p cbdata depends on the @p domain. If @p domain is
+     * MSPTI_CB_DOMAIN_RUNTIME the type
+     * of @p cbdata will be msptiCallbackData.
+     *
+     * @param userdata User data supplied at subscription of the callback
+     * @param domain The domain of the callback
+     * @param cbid The ID of the callback
+     * @param cbdata Data passed to the callback.
+     */
+    typedef void (*msptiCallbackFunc)(void *userdata,
+                                      msptiCallbackDomain domain,
+                                      msptiCallbackId cbid,
+                                      const msptiCallbackData *cbdata);
+
+    struct msptiSubscriber_st;
+
+    /**
+     * @brief A callback subscriber.
+     */
+    typedef struct msptiSubscriber_st *msptiSubscriberHandle;
+
+    /**
+     * @brief Initialize a callback subscriber with a callback function
+     * and user data.
+     *
+     * Initializes a callback subscriber with a callback function and
+     * (optionally) a pointer to user data. The returned subscriber handle
+     * can be used to enable and disable the callback for specific domains
+     * and callback IDs.
+     * @note Only a single subscriber can be registered at a time. To ensure
+     * that no other MSPTI client interrupts the profiling session, it's the
+     * responsibility of all the MSPTI clients to call this function before
+     * starting the profling session.
+     * @note This function does not enable any callbacks.
+     * @note @b Thread-safety: this function is thread safe.
+     *
+     * @param subscriber handle to initialize subscriber
+     * @param callback The callback function
+     * @param userdata A pointer to user data. This data will be passed to
+     * the callback function via the @p userdata paramater.
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INNER if unable to initialize MSPTI
+     * @retval MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is
+     * already a MSPTI subscriber
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL
+     */
+    msptiResult msptiSubscribe(msptiSubscriberHandle *subscriber,
+                               msptiCallbackFunc callback, void *userdata);
+
+    /**
+     * @brief Unregister a callback subscriber.
+     *
+     * Removes a callback subscriber so that no future callbacks will be
+     * issued to that subscriber.
+     *
+     * @param subscriber Handle to the initialize subscriber
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber is NULL or not
+     * initialized
+     */
+    msptiResult msptiUnsubscribe(msptiSubscriberHandle subscriber);
+
+    /**
+     * @brief Enable or disabled callbacks for a specific domain and
+     * callback ID.
+     *
+     * Enable or disabled callbacks for a subscriber for a specific domain
+     * and callback ID.
+     *
+     * @note @b Thread-safety: a subscriber must serialize access to
+     * msptiEnableCallback, msptiEnableDomain.
+     *
+     * @param enable New enable state for the callback. Zero disables the
+     * callback, non-zero enables the callback.
+     * @param subscriber Handle to callback subscription
+     * @param domain The domain of the callback
+     * @param cbid The ID of the callback
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain or @p
+     * cbid is invalid.
+     */
+    msptiResult msptiEnableCallback(uint32_t enable,
+                                    msptiSubscriberHandle subscriber,
+                                    msptiCallbackDomain domain,
+                                    msptiCallbackId cbid);
+
+    /**
+     * @brief Enable or disabled callbacks for a specific domain
+     *
+     * Enable or disabled callbacks for a subscriber for a specific domain
+     *
+     * @note @b Thread-safety: a subscriber must serialize access to
+     * msptiEnableCallback, msptiEnableDomain.
+     *
+     * @param enable New enable state for the callback. Zero disables the
+     * callback, non-zero enables the callback.
+     * @param subscriber Handle to callback subscription
+     * @param domain The domain of the callback
+     *
+     * @retval MSPTI_SUCCESS on success
+     * @retval MSPTI_ERROR_INVALID_PARAMETER if @p subscriber, @p domain is
+     * invalid.
+     */
+    msptiResult msptiEnableDomain(uint32_t enable,
+                                  msptiSubscriberHandle subscriber,
+                                  msptiCallbackDomain domain);
+
+#if defined(__GNUC__) && defined(MSPTI_LIB)
+#pragma GCC visibility pop
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_cbid.h b/systrace/thirdparty/x86_64/mspti/include/mspti_cbid.h
new file mode 100644
index 0000000000000000000000000000000000000000..540ad394376e5a9f6bb74fb0a53c9072a24b1a9c
--- /dev/null
+++ b/systrace/thirdparty/x86_64/mspti/include/mspti_cbid.h
@@ -0,0 +1,83 @@
+/**
+ * @file mspti_cbid.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_CBID_H
+#define MSPTI_CBID_H
+
+/**
+ * @brief Definitions of indices for Runtime API functions, unique across entire
+ * API
+ */
+typedef enum
+{
+    MSPTI_CBID_RUNTIME_INVALID = 0,
+    MSPTI_CBID_RUNTIME_DEVICE_SET = 1,
+    MSPTI_CBID_RUNTIME_DEVICE_RESET = 2,
+    MSPTI_CBID_RUNTIME_DEVICE_SET_EX = 3,
+    MSPTI_CBID_RUNTIME_CONTEXT_CREATED_EX = 4,
+    MSPTI_CBID_RUNTIME_CONTEXT_CREATED = 5,
+    MSPTI_CBID_RUNTIME_CONTEXT_DESTROY = 6,
+    MSPTI_CBID_RUNTIME_STREAM_CREATED = 7,
+    MSPTI_CBID_RUNTIME_STREAM_DESTROY = 8,
+    MSPTI_CBID_RUNTIME_STREAM_SYNCHRONIZED = 9,
+    MSPTI_CBID_RUNTIME_LAUNCH = 10,
+    MSPTI_CBID_RUNTIME_CPU_LAUNCH = 11,
+    MSPTI_CBID_RUNTIME_AICPU_LAUNCH = 12,
+    MSPTI_CBID_RUNTIME_AIV_LAUNCH = 13,
+    MSPTI_CBID_RUNTIME_FFTS_LAUNCH = 14,
+    MSPTI_CBID_RUNTIME_MALLOC = 15,
+    MSPTI_CBID_RUNTIME_FREE = 16,
+    MSPTI_CBID_RUNTIME_MALLOC_HOST = 17,
+    MSPTI_CBID_RUNTIME_FREE_HOST = 18,
+    MSPTI_CBID_RUNTIME_MALLOC_CACHED = 19,
+    MSPTI_CBID_RUNTIME_FLUSH_CACHE = 20,
+    MSPTI_CBID_RUNTIME_INVALID_CACHE = 21,
+    MSPTI_CBID_RUNTIME_MEMCPY = 22,
+    MSPTI_CBID_RUNTIME_MEMCPY_HOST = 23,
+    MSPTI_CBID_RUNTIME_MEMCPY_ASYNC = 24,
+    MSPTI_CBID_RUNTIME_MEM_CPY2D = 25,
+    MSPTI_CBID_RUNTIME_MEM_CPY2D_ASYNC = 26,
+    MSPTI_CBID_RUNTIME_MEM_SET = 27,
+    MSPTI_CBID_RUNTIME_MEM_SET_ASYNC = 28,
+    MSPTI_CBID_RUNTIME_MEM_GET_INFO = 29,
+    MSPTI_CBID_RUNTIME_RESERVE_MEM_ADDRESS = 30,
+    MSPTI_CBID_RUNTIME_RELEASE_MEM_ADDRESS = 31,
+    MSPTI_CBID_RUNTIME_MALLOC_PHYSICAL = 32,
+    MSPTI_CBID_RUNTIME_FREE_PHYSICAL = 33,
+    MSPTI_CBID_RUNTIME_MEM_EXPORT_TO_SHAREABLE_HANDLE = 34,
+    MSPTI_CBID_RUNTIME_MEM_IMPORT_FROM_SHAREABLE_HANDLE = 35,
+    MSPTI_CBID_RUNTIME_MEM_SET_PID_TO_SHAREABLE_HANDLE = 36,
+    MSPTI_CBID_RUNTIME_SIZE,
+    MSPTI_CBID_RUNTIME_FORCE_INT = 0x7fffffff
+} msptiCallbackIdRuntime;
+
+/**
+ * @brief Definitions of indices for hccl API functions
+ */
+typedef enum
+{
+    MSPTI_CBID_HCCL_INVALID = 0,
+    MSPTI_CBID_HCCL_ALLREDUCE = 1,
+    MSPTI_CBID_HCCL_BROADCAST = 2,
+    MSPTI_CBID_HCCL_ALLGATHER = 3,
+    MSPTI_CBID_HCCL_REDUCE_SCATTER = 4,
+    MSPTI_CBID_HCCL_REDUCE = 5,
+    MSPTI_CBID_HCCL_ALL_TO_ALL = 6,
+    MSPTI_CBID_HCCL_ALL_TO_ALLV = 7,
+    MSPTI_CBID_HCCL_BARRIER = 8,
+    MSPTI_CBID_HCCL_SCATTER = 9,
+    MSPTI_CBID_HCCL_SEND = 10,
+    MSPTI_CBID_HCCL_RECV = 11,
+    MSPTI_CBID_HCCL_SENDRECV = 12,
+    MSPTI_CBID_HCCL_SIZE,
+    MSPTI_CBID_HCCL_FORCE_INT = 0x7fffffff
+} msptiCallbackIdHccl;
+
+#endif
diff --git a/systrace/thirdparty/x86_64/mspti/include/mspti_result.h b/systrace/thirdparty/x86_64/mspti/include/mspti_result.h
new file mode 100644
index 0000000000000000000000000000000000000000..902647eed2e5efc7b69f2d2dd865e228d4a22d0e
--- /dev/null
+++ b/systrace/thirdparty/x86_64/mspti/include/mspti_result.h
@@ -0,0 +1,30 @@
+/**
+ * @file mspti_result.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef MSPTI_BASE_H
+#define MSPTI_BASE_H
+
+/**
+ * @brief MSPTI result codes.
+ *
+ * Error and result codes returned by MSPTI functions.
+ */
+typedef enum
+{
+    MSPTI_SUCCESS = 0,
+    MSPTI_ERROR_INVALID_PARAMETER = 1,
+    MSPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 2,
+    MSPTI_ERROR_MAX_LIMIT_REACHED = 3,
+    MSPTI_ERROR_DEVICE_OFFLINE = 4,
+    MSPTI_ERROR_INNER = 999,
+    MSPTI_ERROR_FOECE_INT = 0x7fffffff
+} msptiResult;
+
+#endif
diff --git a/systrace/thirdparty/x86_64/mspti/lib64/libmspti.so b/systrace/thirdparty/x86_64/mspti/lib64/libmspti.so
new file mode 100644
index 0000000000000000000000000000000000000000..79f2ec422f26585fd16f8b9ff95318447f8458fc
Binary files /dev/null and b/systrace/thirdparty/x86_64/mspti/lib64/libmspti.so differ
diff --git a/systrace/watchdog/watchdog.py b/systrace/watchdog/watchdog.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d594523692df0efd882bbeff5a3adaba1d21b56
--- /dev/null
+++ b/systrace/watchdog/watchdog.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""
+系统监控脚本 v1.0
+功能：双配置文件监控、环境变量更新、进程逃生管理
+"""
+import os
+import json
+import time
+import signal
+import logging
+import argparse
+import subprocess
+import threading
+from schedule import every, run_pending
+from datetime import datetime
+
+# 配置全局参数
+DEFAULT_TRACE_FILE = "trace.json"
+DEFAULT_LOOP_TIME = 3
+DEFAULT_LOG_FILE = "log.json"
+ENV_MAPPING = {
+    "sysTraceD_L0": "sysTraceD_L0",
+    "sysTraceD_L1": "sysTraceD_L1",
+    "sysTraceD_L2": "sysTraceD_L2",
+    "sysTraceD_L3": "sysTraceD_L3",
+    "escape_switch": "sysTrace_Escape"
+}
+
+class SystemMonitor:
+    def __init__(self, trace_file):
+        self.trace_file = trace_file
+        self.sysTrace_pid = None
+        self._setup_logging()
+        self._parse_args()
+        self.start_sysTrace()
+
+    def _setup_logging(self):
+        """配置日志记录"""
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(asctime)s - %(levelname)s: %(message)s',
+            handlers=[logging.FileHandler('monitor.log'), logging.StreamHandler()]
+        )
+
+    def _parse_args(self):
+        """解析命令行参数"""
+        parser = argparse.ArgumentParser(description='System Monitor Daemon')
+        parser.add_argument('--trace', type=str, default=DEFAULT_TRACE_FILE, 
+                          help='Path to trace config (default: trace.json)')
+        parser.add_argument('--tracelog', type=str, default=DEFAULT_LOG_FILE,
+                          help='Path to trace config (default: log.json)')
+        self.args = parser.parse_args()
+
+    def start_sysTrace(self):
+        """启动监控进程"""
+        try:
+            proc = subprocess.Popen(["sysTrace"], stdout=subprocess.PIPE)
+            self.sysTrace_pid = proc.pid
+            logging.info(f"Started sysTrace (PID: {self.sysTrace_pid})")
+        except FileNotFoundError:
+            logging.error("sysTrace executable not found in PATH")
+
+    def kill_sysTrace(self):
+        """终止监控进程"""
+        if self.sysTrace_pid and os.path.exists(f"/proc/{self.sysTrace_pid}"):
+            os.kill(self.sysTrace_pid, signal.SIGTERM)
+            logging.info(f"Killed sysTrace (PID: {self.sysTrace_pid})")
+            self.sysTrace_pid = None
+
+    @staticmethod
+    def validate_json(file_path):
+        """验证JSON文件格式"""
+        try:
+            with open(file_path) as f:
+                json.load(f)
+                return True
+        except (json.JSONDecodeError, FileNotFoundError) as e:
+            logging.error(f"Invalid JSON {file_path}: {str(e)}")
+            return False
+
+    def check_systrace_stream(self, file_path, buffer_size=4096):
+        """内存优化的流式检查"""
+        target = "i am sysTrace"
+        window = ""
+    
+        try:
+            with open(file_path, 'r') as f:
+                while (chunk := f.read(buffer_size)) :
+                    window += chunk
+                    if target in window:
+                        logging.info(f"find str")
+                        return True
+                    # 保留可能跨分块的尾部字符
+                    window = window[-len(target):] if len(window) > len(target) else window
+            return False
+        except FileNotFoundError:
+            print(f"文件 {file_path} 不存在")
+            return False
+
+    def update_env_vars(self):
+        """更新环境变量"""
+        if not self.validate_json(self.args.trace):
+            return
+        try:
+            with open(self.args.trace, 'r') as f:
+                data = json.load(f)
+                for key, env_var in ENV_MAPPING.items():
+                    value = str(data.get(key, "false")).lower()
+                    os.environ[env_var] = value
+                    logging.info(f"Set {env_var}={value}")
+        except Exception as e:
+            logging.error(f"Env update failed: {str(e)}")
+
+    def check_escape_trigger(self):
+        """检查逃生开关"""
+        if not self.validate_json(self.args.trace):
+            return
+        try:
+            with open(self.args.trace, 'r') as f:
+                data = json.load(f)
+                if data.get("escape_switch", False) is True:
+                    self.kill_sysTrace()
+        except Exception as e:
+            logging.error(f"Escape check failed: {str(e)}")
+
+    def scheduler_task(self):
+        """定时任务调度"""
+        every(DEFAULT_LOOP_TIME).seconds.do(self.update_env_vars)
+        every(DEFAULT_LOOP_TIME).seconds.do(self.check_escape_trigger)
+        every(DEFAULT_LOOP_TIME).seconds.do(self.check_systrace_stream, self.args.tracelog)
+        while True:
+            run_pending()
+            time.sleep(1)
+
+    def run(self):
+        """主运行循环"""
+        threading.Thread(target=self.scheduler_task, daemon=True).start()
+        try:
+            while True:
+                time.sleep(3600)
+        except KeyboardInterrupt:
+            self.kill_sysTrace()
+            logging.info("Service stopped")
+
+if __name__ == "__main__":
+    monitor = SystemMonitor(DEFAULT_TRACE_FILE)
+    monitor.run()
\ No newline at end of file