From 2f8f4cebd086cf9ae14619c5d93bfff6ca67ee0d Mon Sep 17 00:00:00 2001 From: weli-l <1289113577@qq.com> Date: Mon, 7 Jul 2025 16:36:10 +0800 Subject: [PATCH 1/4] add proto2 version for systrace Signed-off-by: weli-l <1289113577@qq.com> --- systrace/build.sh | 17 ++++- systrace/protos/systrace.v2.proto | 76 +++++++++++++++++++ .../{systrace.proto => systrace.v3.proto} | 0 3 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 systrace/protos/systrace.v2.proto rename systrace/protos/{systrace.proto => systrace.v3.proto} (100%) diff --git a/systrace/build.sh b/systrace/build.sh index 65495c9..1d1d9bc 100644 --- a/systrace/build.sh +++ b/systrace/build.sh @@ -10,11 +10,22 @@ rm src/os/*.skel.h -rf cp -f config/PyFuncList /etc/systrace/config/PyFuncList +PROTOC_VERSION=$(protoc --version | awk '{print $2}' | cut -d. -f1) +PROTO_FILE="" +PROTO_EXTRA_OPT="" + +if [ "$PROTOC_VERSION" -ge 3 ]; then + PROTO_FILE="systrace.v3.proto" +else + PROTO_FILE="systrace.v2.proto" +fi + cd protos -protoc --c_out=. systrace.proto -protoc --cpp_out=. systrace.proto -protoc --python_out=. systrace.proto +protoc --c_out=. $PROTO_FILE +protoc --cpp_out=. $PROTO_FILE +protoc --python_out=. $PROTO_FILE cd .. + cd build cmake .. make -j $(nproc) \ No newline at end of file diff --git a/systrace/protos/systrace.v2.proto b/systrace/protos/systrace.v2.proto new file mode 100644 index 0000000..f583d0a --- /dev/null +++ b/systrace/protos/systrace.v2.proto @@ -0,0 +1,76 @@ +syntax = "proto2"; + +message StackFrame { + optional uint64 address = 1; + optional string so_name = 2; +} + +message MemAllocEntry { + optional uint64 alloc_ptr = 1; + optional uint32 stage_id = 2; + optional StageType stage_type = 3; + optional uint64 mem_size = 4; + repeated StackFrame stack_frames = 5; +} + +message MemFreeEntry { + optional uint64 alloc_ptr = 1; + optional uint32 stage_id = 2; + optional StageType stage_type = 3; +} + +message ProcMem { + optional uint32 pid = 1; + repeated MemAllocEntry mem_alloc_stacks = 2; + repeated MemFreeEntry mem_free_stacks = 3; +} + +enum StageType { + STAGE_UNKNOWN = 0; + STAGE_DATALOADER = 1; + STAGE_FORWARD = 2; + STAGE_BACKWARD = 3; + STAGE_SYNCHRONIZATION = 4; + STAGE_GC = 5; +} + +message GcDebugData { + optional uint32 collected = 1; + optional uint32 uncollectable = 2; +} + +message PytorchStage { + optional uint32 stage_id = 1; + optional string stage_type = 2; + optional uint64 start_us = 3; + optional uint64 end_us = 4; + repeated string stack_frames = 5; + optional GcDebugData gc_debug = 6; +} + +message Pytorch { + repeated PytorchStage pytorch_stages = 1; + optional uint32 rank = 2; + optional uint32 step_id = 3; + optional string comm = 4; // 任务名 +} + +message Mem { + repeated ProcMem proc_mem = 1; +} + +message OSprobe { + repeated OSprobeEntry OSprobe_entries = 1; +} + +message OSprobeEntry { + optional uint32 key = 1; + optional uint64 start_us = 2; + optional uint64 dur = 3; + optional uint64 rundelay = 4; + optional uint32 OS_event_type = 5; + optional uint32 rank = 6; + optional string comm = 7; + optional string nxt_comm = 8; + optional uint32 nxt_pid = 9; +} diff --git a/systrace/protos/systrace.proto b/systrace/protos/systrace.v3.proto similarity index 100% rename from systrace/protos/systrace.proto rename to systrace/protos/systrace.v3.proto -- Gitee From 926933d9c7ed5fd9852643baf4d6e10641d7887b Mon Sep 17 00:00:00 2001 From: weli-l <1289113577@qq.com> Date: Thu, 10 Jul 2025 17:24:28 +0800 Subject: [PATCH 2/4] add merge_json_by_rank.py Signed-off-by: weli-l <1289113577@qq.com> --- systrace/convert/merge_json_by_rank.py | 60 ++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 systrace/convert/merge_json_by_rank.py diff --git a/systrace/convert/merge_json_by_rank.py b/systrace/convert/merge_json_by_rank.py new file mode 100644 index 0000000..bc0a24f --- /dev/null +++ b/systrace/convert/merge_json_by_rank.py @@ -0,0 +1,60 @@ +import json +import argparse +from concurrent.futures import ThreadPoolExecutor +from collections import defaultdict +import threading + +""" +Usage: +python merge_json_by_rank.py file1.json file2.json [...] --output merged.json +""" + +def load_json_file(file_path): + with open(file_path, 'r') as f: + return json.load(f) + +def merge_events_by_pid(files): + pid_map = defaultdict(list) + lock = threading.Lock() + + def process_file(file_path): + nonlocal pid_map + data = load_json_file(file_path) + with lock: + for event in data['traceEvents']: + pid_map[event['pid']].append(event) + + with ThreadPoolExecutor() as executor: + executor.map(process_file, files) + + return pid_map + +def save_merged_data(pid_map, output_file): + merged_events = [] + for pid in sorted(pid_map.keys()): + pid_events = sorted(pid_map[pid], key=lambda x: x['ts']) + merged_events.extend(pid_events) + + merged_data = { + "traceEvents": merged_events, + "displayTimeUnit": "ns", + "metadata": { + "format": "Merged Trace Data", + "merged_pids": len(pid_map), + "total_events": len(merged_events) + } + } + + with open(output_file, 'w') as f: + json.dump(merged_data, f, indent=None, separators=(',', ':')) + + print(f"Merged {len(pid_map)} PIDs with total {len(merged_events)} events to {output_file}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Merge JSON trace files by PID with multithreading') + parser.add_argument('input_files', nargs='+', help='Input JSON files to merge') + parser.add_argument('--output', required=True, help='Output JSON file path') + args = parser.parse_args() + + pid_map = merge_events_by_pid(args.input_files) + save_merged_data(pid_map, args.output) \ No newline at end of file -- Gitee From 9616334eff52679a6a2582f45f043587356fb3ce Mon Sep 17 00:00:00 2001 From: weli-l <1289113577@qq.com> Date: Sat, 12 Jul 2025 15:52:32 +0800 Subject: [PATCH 3/4] fix compile err Signed-off-by: weli-l <1289113577@qq.com> --- systrace/build.sh | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/systrace/build.sh b/systrace/build.sh index 1d1d9bc..32cecad 100644 --- a/systrace/build.sh +++ b/systrace/build.sh @@ -14,16 +14,15 @@ PROTOC_VERSION=$(protoc --version | awk '{print $2}' | cut -d. -f1) PROTO_FILE="" PROTO_EXTRA_OPT="" +cd protos if [ "$PROTOC_VERSION" -ge 3 ]; then - PROTO_FILE="systrace.v3.proto" + mv systrace.v3.proto systrace.proto else - PROTO_FILE="systrace.v2.proto" + mv systrace.v2.proto systrace.proto fi - -cd protos -protoc --c_out=. $PROTO_FILE -protoc --cpp_out=. $PROTO_FILE -protoc --python_out=. $PROTO_FILE +protoc --c_out=. systrace.proto +protoc --cpp_out=. systrace.proto +protoc --python_out=. systrace.proto cd .. cd build -- Gitee From dbc443404f751bc4959cb1fcab89cc6dff1e482c Mon Sep 17 00:00:00 2001 From: weli-l <1289113577@qq.com> Date: Sat, 12 Jul 2025 16:07:26 +0800 Subject: [PATCH 4/4] fix ringbuf err Signed-off-by: weli-l <1289113577@qq.com> --- systrace/src/os/bpf_comm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/systrace/src/os/bpf_comm.h b/systrace/src/os/bpf_comm.h index 90b1a58..0fbe65a 100644 --- a/systrace/src/os/bpf_comm.h +++ b/systrace/src/os/bpf_comm.h @@ -182,6 +182,30 @@ static __always_inline void emit_event(trace_event_data_t *event, void *ctx) case 7: bpf_ringbuf_output(&osprobe_map_7, event, sizeof(*event), 0); break; + case 8: + bpf_ringbuf_output(&osprobe_map_8, event, sizeof(*event), 0); + break; + case 9: + bpf_ringbuf_output(&osprobe_map_9, event, sizeof(*event), 0); + break; + case 10: + bpf_ringbuf_output(&osprobe_map_10, event, sizeof(*event), 0); + break; + case 11: + bpf_ringbuf_output(&osprobe_map_11, event, sizeof(*event), 0); + break; + case 12: + bpf_ringbuf_output(&osprobe_map_12, event, sizeof(*event), 0); + break; + case 13: + bpf_ringbuf_output(&osprobe_map_13, event, sizeof(*event), 0); + break; + case 14: + bpf_ringbuf_output(&osprobe_map_14, event, sizeof(*event), 0); + break; + case 15: + bpf_ringbuf_output(&osprobe_map_15, event, sizeof(*event), 0); + break; default: break; } -- Gitee